From 819cbb197113497f6af97999239ddec2553911ed Mon Sep 17 00:00:00 2001
From: Chang Y <yech1990@gmail.com>
Date: Fri, 29 Jul 2022 23:45:47 -0500
Subject: [PATCH 01/35] Update ensembl_release_versions.py

---
 pyensembl/ensembl_release_versions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 1b86d24..c19279e 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -11,7 +11,7 @@
 # limitations under the License.
 
 MIN_ENSEMBL_RELEASE = 54
-MAX_ENSEMBL_RELEASE = 106
+MAX_ENSEMBL_RELEASE = 107
 
 def check_release_number(release):
     """

From 14634449db472812041ad2a50480fbfea1ef3426 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 18 Aug 2022 16:31:06 -0500
Subject: [PATCH 02/35] fix naming

---
 pyensembl/fasta.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index 0893a81..9c724db 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -51,9 +51,10 @@ def _parse_header_id(line):
     # .e.g.
     # "ENST00000448914.1" instead of "ENST00000448914"
     # So now we have to parse out the identifier
-    dot_index = identifier.find(b".")
-    if dot_index >= 0:
-        identifier = identifier[:dot_index]
+    if identifier.startswith(b"ENS"):
+        dot_index = identifier.find(b".")
+        if dot_index >= 0:
+            identifier = identifier[:dot_index]
 
     return identifier.decode("ascii")
 

From 6829d89399954d68622995ec22849a3e2bdd3852 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 18 Aug 2022 17:11:42 -0500
Subject: [PATCH 03/35] quick update

---
 pyensembl/fasta.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index 9c724db..0893a81 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -51,10 +51,9 @@ def _parse_header_id(line):
     # .e.g.
     # "ENST00000448914.1" instead of "ENST00000448914"
     # So now we have to parse out the identifier
-    if identifier.startswith(b"ENS"):
-        dot_index = identifier.find(b".")
-        if dot_index >= 0:
-            identifier = identifier[:dot_index]
+    dot_index = identifier.find(b".")
+    if dot_index >= 0:
+        identifier = identifier[:dot_index]
 
     return identifier.decode("ascii")
 

From 2ee9eb2dd62f66b0469bd69e06523d3de8c9f863 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 18 Aug 2022 17:41:58 -0500
Subject: [PATCH 04/35] quick update

---
 pyensembl/fasta.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index 0893a81..bf2d813 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -51,9 +51,13 @@ def _parse_header_id(line):
     # .e.g.
     # "ENST00000448914.1" instead of "ENST00000448914"
     # So now we have to parse out the identifier
-    dot_index = identifier.find(b".")
-    if dot_index >= 0:
-        identifier = identifier[:dot_index]
+
+    # only split name of ENSEMBL naming. In other database, such as TAIR,
+    # the '.1' notation is the isoform not the version.
+    if identifier.startswith(b"ENS"):
+        dot_index = identifier.find(b".")
+        if dot_index >= 0:
+            identifier = identifier[:dot_index]
 
     return identifier.decode("ascii")
 

From 43f5e1cbe592ec1a229e89bca7b14b36cb5502d6 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 18 Aug 2022 17:55:49 -0500
Subject: [PATCH 05/35] quick update

---
 pyensembl/transcript.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 385f07a..19061b4 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -416,6 +416,9 @@ def sequence(self):
         Spliced cDNA sequence of transcript
         (includes 5" UTR, coding sequence, and 3" UTR)
         """
+        transcript_id = self.transcript_id
+        if transcript_id.startswith("ENS"):
+            transcript_id = transcript_id.rsplit(".", 1)[0]
         return self.genome.transcript_sequences.get(self.transcript_id.rsplit(".", 1)[0])
 
     @memoized_property

From be260ffdc4b32c177144c246e06f2a83e4a41145 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 18 Aug 2022 17:59:14 -0500
Subject: [PATCH 06/35] quick update

---
 pyensembl/transcript.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 19061b4..9d30c5c 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -419,7 +419,7 @@ def sequence(self):
         transcript_id = self.transcript_id
         if transcript_id.startswith("ENS"):
             transcript_id = transcript_id.rsplit(".", 1)[0]
-        return self.genome.transcript_sequences.get(self.transcript_id.rsplit(".", 1)[0])
+        return self.genome.transcript_sequences.get(transcript_id)
 
     @memoized_property
     def first_start_codon_spliced_offset(self):

From 86968b54c05e932258aa7a9fb2749a097194f151 Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Thu, 28 Dec 2023 16:13:42 -0600
Subject: [PATCH 07/35] add species

---
 pyensembl/species.py | 43 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 7 deletions(-)

diff --git a/pyensembl/species.py b/pyensembl/species.py
index 588c4f7..bc05890 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -100,7 +100,8 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome" % i
+                        "Ensembl release %d already has an associated genome"
+                        % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -113,10 +114,13 @@ def which_reference(self, ensembl_release):
         return self._release_to_genome[ensembl_release]
 
     def __str__(self):
-        return "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)" % (
-            self.latin_name,
-            self.synonyms,
-            self.reference_assemblies,
+        return (
+            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)"
+            % (
+                self.latin_name,
+                self.synonyms,
+                self.reference_assemblies,
+            )
         )
 
     def __eq__(self, other):
@@ -304,6 +308,18 @@ def check_species_object(species_name_or_object):
     reference_assemblies={"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
 )
 
+zebrafish = Species.register(
+    latin_name="danio_rerio",
+    synonyms=["zebrafish"],
+    reference_assemblies={
+        "ZFISH7": (47, 53),
+        "Zv8": (54, 59),
+        "Zv9": (60, 79),
+        "GRCz10": (80, 91),
+        "GRCz11": (92, MAX_ENSEMBL_RELEASE),
+    },
+)
+
 fly = Species.register(
     latin_name="drosophila_melanogaster",
     synonyms=["drosophila", "fruit fly", "fly"],
@@ -316,10 +332,23 @@ def check_species_object(species_name_or_object):
     },
 )
 
+nematode = Species.register(
+    latin_name="caenorhabditis_elegans",
+    synonyms=["nematode", "C_elegans"],
+    reference_assemblies={
+        "WS180": (47, 49),
+        "WS190": (50, 54),
+        "WS200": (55, 57),
+        "WS210": (58, 78),
+        "WS220": (79, 66),
+        "WBcel235": (67, MAX_ENSEMBL_RELEASE),
+    },
+)
+
 yeast = Species.register(
     latin_name="saccharomyces_cerevisiae",
-    synonyms=["yeast","budding_yeast"],
+    synonyms=["yeast", "budding_yeast"],
     reference_assemblies={
         "R64-1-1": (76, MAX_ENSEMBL_RELEASE),
     },
-)
\ No newline at end of file
+)

From 29c4cce5133ea24e80257fff085dca5e57476119 Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Fri, 29 Dec 2023 12:05:17 -0600
Subject: [PATCH 08/35] format and relase

---
 README.md                              |  33 ++---
 docs/conf.py                           | 155 ++++++++++++-----------
 pyensembl/genome.py                    |   5 +-
 pyensembl/species.py                   |  22 ++--
 pyensembl/transcript.py                | 167 ++++++++++++-------------
 pyensembl/version.py                   |   2 +-
 test/common.py                         |   3 +
 test/data.py                           | 109 ++++++++--------
 test/test_contigs.py                   |   1 +
 test/test_download_cache.py            |  36 +++---
 test/test_ensembl_gtf.py               |   1 +
 test/test_ensembl_object_properties.py |   1 +
 test/test_exon_id.py                   | 150 +++++++++++++---------
 test/test_exon_object.py               |  19 +--
 test/test_gene_ids.py                  |  44 ++++---
 test/test_gene_names.py                |  34 +++--
 test/test_gene_objects.py              |  30 +++--
 test/test_id_length.py                 |  18 +--
 test/test_locus.py                     |   5 +
 test/test_missing_genome_sources.py    |  44 ++++---
 test/test_mouse.py                     |  26 ++--
 test/test_release_versions.py          |  15 ++-
 test/test_search.py                    |  32 +++--
 test/test_sequence_data.py             |  35 +++---
 test/test_serialization.py             |   7 +-
 test/test_string_representation.py     |  21 +++-
 test/test_timings.py                   |  26 +++-
 test/test_transcript_ids.py            |  41 +++---
 test/test_transcript_objects.py        | 121 +++++++++++-------
 test/test_transcript_sequences.py      |   1 +
 test/test_transcript_support_level.py  |   9 +-
 test/test_ucsc_gtf.py                  |  72 +++++------
 32 files changed, 732 insertions(+), 553 deletions(-)

diff --git a/README.md b/README.md
index 624d036..d1445fa 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,9 @@
     <img src="https://img.shields.io/pypi/v/pyensembl.svg?maxAge=1000" alt="PyPI" />
 </a>
 
+# PyEnsembl
 
-PyEnsembl
-=======
-PyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files. 
+PyEnsembl is a Python interface to [Ensembl](http://www.ensembl.org) reference genome metadata such as exons and transcripts. PyEnsembl downloads [GTF](https://en.wikipedia.org/wiki/Gene_transfer_format) and [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files from the [Ensembl FTP server](ftp://ftp.ensembl.org) and loads them into a local database. PyEnsembl can also work with custom reference data specified using user-supplied GTF and FASTA files.
 
 # Example Usage
 
@@ -25,7 +24,7 @@ data = EnsemblRelease(77)
 gene_names = data.gene_names_at_locus(contig=6, position=29945884)
 
 # get all exons associated with HLA-A
-exon_ids  = data.exon_ids_of_gene_name('HLA-A')
+exon_ids = data.exon_ids_of_gene_name("HLA-A")
 ```
 
 # Installation
@@ -52,6 +51,7 @@ Alternatively, you can create the `EnsemblRelease` object from inside a Python
 process and call `ensembl_object.download()` followed by `ensembl_object.index()`.
 
 ## Cache Location
+
 By default, PyEnsembl uses the platform-specific `Cache` folder
 and caches the files into the `pyensembl` sub-directory.
 You can override this default by setting the environment key `PYENSEMBL_CACHE_DIR`
@@ -66,11 +66,11 @@ or
 ```python
 import os
 
-os.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir'
+os.environ["PYENSEMBL_CACHE_DIR"] = "/custom/cache/dir"
 # ... PyEnsembl API usage
 ```
 
-# Usage tips 
+# Usage tips
 
 ## List installed genomes
 
@@ -80,6 +80,7 @@ pyensembl list
 
 ```python
 from pyensembl.shell import collect_all_installed_ensembl_releases
+
 collect_all_installed_ensembl_releases()
 ```
 
@@ -87,10 +88,11 @@ collect_all_installed_ensembl_releases()
 
 ```python
 from pyensembl import EnsemblRelease
+
 data = EnsemblRelease(
     release=100,
-    species=find_species_by_name('drosophila_melanogaster'),
-    )
+    species=find_species_by_name("drosophila_melanogaster"),
+)
 ```
 
 ## Data structure
@@ -98,13 +100,13 @@ data = EnsemblRelease(
 ### Gene object
 
 ```python
-gene=data.gene_by_id(gene_id='FBgn0011747')
+gene = data.gene_by_id(gene_id="FBgn0011747")
 ```
 
 ### Transcript object
 
 ```python
-transcript=gene.transcripts[0]
+transcript = gene.transcripts[0]
 ```
 
 ### Protein information
@@ -125,11 +127,12 @@ For example:
 
 ```python
 from pyensembl import Genome
+
 data = Genome(
-    reference_name='GRCh38',
-    annotation_name='my_genome_features',
+    reference_name="GRCh38",
+    annotation_name="my_genome_features",
     # annotation_version=None,
-    gtf_path_or_url='/My/local/gtf/path_to_my_genome_features.gtf', # Path or URL of GTF file
+    gtf_path_or_url="/My/local/gtf/path_to_my_genome_features.gtf",  # Path or URL of GTF file
     # transcript_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing transcript sequences
     # protein_fasta_paths_or_urls=None, # List of paths or URLs of FASTA files containing protein sequences
     # cache_directory_path=None, # Where to place downloaded and cached files for this genome
@@ -142,8 +145,8 @@ gene_names = data.gene_names_at_locus(contig=6, position=29945884)
 # API
 
 The `EnsemblRelease` object has methods to let you access all possible
-combinations of the annotation features *gene\_name*, *gene\_id*,
-*transcript\_name*, *transcript\_id*, *exon\_id* as well as the location of
+combinations of the annotation features _gene_name_, _gene_id_,
+_transcript_name_, _transcript_id_, _exon_id_ as well as the location of
 these genomic elements (contig, start position, end position, strand).
 
 ## Genes
diff --git a/docs/conf.py b/docs/conf.py
index bbc0aaf..1c4034e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,47 +18,47 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-sys.path.insert(0, os.path.abspath('..'))
+sys.path.insert(0, os.path.abspath(".."))
 
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
+    "sphinx.ext.autodoc",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'pyensembl'
-copyright = u'2016, Hammer Lab'
-author = u'Hammer Lab'
+project = "pyensembl"
+copyright = "2016, Hammer Lab"
+author = "Hammer Lab"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = u'0.8.10'
+version = "0.8.10"
 # The full version, including alpha/beta/rc tags.
-release = u'0.8.10'
+release = "0.8.10"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -69,37 +69,37 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = False
@@ -109,156 +109,149 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'alabaster'
+html_theme = "alabaster"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'pyensembldoc'
+htmlhelp_basename = "pyensembldoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
+    # Latex figure (float) alignment
+    #'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'pyensembl.tex', u'pyensembl Documentation',
-     u'Hammer Lab', 'manual'),
+    (master_doc, "pyensembl.tex", "pyensembl Documentation", "Hammer Lab", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'pyensembl', u'pyensembl Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "pyensembl", "pyensembl Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -267,19 +260,25 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'pyensembl', u'pyensembl Documentation',
-     author, 'pyensembl', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "pyensembl",
+        "pyensembl Documentation",
+        author,
+        "pyensembl",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index 05b6efc..5345742 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -291,8 +291,9 @@ def db(self):
             # make sure GTF file exists locally
             # and populate self.gtf_path
             self._set_local_paths(
-                download_if_missing=True, ## if set at False the files are not downloaded in interactive python, works anyways via command line though
-                overwrite=False)
+                download_if_missing=True,  ## if set at False the files are not downloaded in interactive python, works anyways via command line though
+                overwrite=False,
+            )
             if self.gtf_path is None:
                 raise ValueError("Property 'gtf_path' of %s cannot be None" % self)
 
diff --git a/pyensembl/species.py b/pyensembl/species.py
index bc05890..c19f359 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -100,8 +100,7 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome"
-                        % i
+                        "Ensembl release %d already has an associated genome" % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -114,13 +113,10 @@ def which_reference(self, ensembl_release):
         return self._release_to_genome[ensembl_release]
 
     def __str__(self):
-        return (
-            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)"
-            % (
-                self.latin_name,
-                self.synonyms,
-                self.reference_assemblies,
-            )
+        return "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)" % (
+            self.latin_name,
+            self.synonyms,
+            self.reference_assemblies,
         )
 
     def __eq__(self, other):
@@ -312,7 +308,7 @@ def check_species_object(species_name_or_object):
     latin_name="danio_rerio",
     synonyms=["zebrafish"],
     reference_assemblies={
-        "ZFISH7": (47, 53),
+        # "ZFISH7": (47, 53),
         "Zv8": (54, 59),
         "Zv9": (60, 79),
         "GRCz10": (80, 91),
@@ -336,8 +332,8 @@ def check_species_object(species_name_or_object):
     latin_name="caenorhabditis_elegans",
     synonyms=["nematode", "C_elegans"],
     reference_assemblies={
-        "WS180": (47, 49),
-        "WS190": (50, 54),
+        # "WS180": (47, 49),
+        # "WS190": (50, 54),
         "WS200": (55, 57),
         "WS210": (58, 78),
         "WS220": (79, 66),
@@ -349,6 +345,6 @@ def check_species_object(species_name_or_object):
     latin_name="saccharomyces_cerevisiae",
     synonyms=["yeast", "budding_yeast"],
     reference_assemblies={
-        "R64-1-1": (76, MAX_ENSEMBL_RELEASE),
+        "R64-1-1": (75, MAX_ENSEMBL_RELEASE),
     },
 )
diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 9d30c5c..012a152 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -24,18 +24,20 @@ class Transcript(LocusWithGenome):
     and not using the sequence, avoid the memory/performance overhead
     of fetching and storing sequences from a FASTA file.
     """
+
     def __init__(
-            self,
-            transcript_id,
-            transcript_name,
-            contig,
-            start,
-            end,
-            strand,
-            biotype,
-            gene_id,
-            genome,
-            support_level=None):
+        self,
+        transcript_id,
+        transcript_name,
+        contig,
+        start,
+        end,
+        strand,
+        biotype,
+        gene_id,
+        genome,
+        support_level=None,
+    ):
         LocusWithGenome.__init__(
             self,
             contig=contig,
@@ -43,7 +45,8 @@ def __init__(
             end=end,
             strand=strand,
             biotype=biotype,
-            genome=genome)
+            genome=genome,
+        )
         self.transcript_id = transcript_id
         self.transcript_name = transcript_name
         self.gene_id = gene_id
@@ -71,16 +74,18 @@ def __str__(self):
             " biotype='%s',"
             " contig='%s',"
             " start=%d,"
-            " end=%d, strand='%s', genome='%s')") % (
-                self.transcript_id,
-                self.name,
-                self.gene_id,
-                self.biotype,
-                self.contig,
-                self.start,
-                self.end,
-                self.strand,
-                self.genome.reference_name)
+            " end=%d, strand='%s', genome='%s')"
+        ) % (
+            self.transcript_id,
+            self.name,
+            self.gene_id,
+            self.biotype,
+            self.contig,
+            self.start,
+            self.end,
+            self.strand,
+            self.genome.reference_name,
+        )
 
     def __len__(self):
         """
@@ -90,9 +95,10 @@ def __len__(self):
 
     def __eq__(self, other):
         return (
-            other.__class__ is Transcript and
-            self.id == other.id and
-            self.genome == other.genome)
+            other.__class__ is Transcript
+            and self.id == other.id
+            and self.genome == other.genome
+        )
 
     def __hash__(self):
         return hash(self.id)
@@ -120,10 +126,8 @@ def exons(self):
         # in each transcript
         columns = ["exon_number", "exon_id"]
         exon_numbers_and_ids = self.db.query(
-            columns,
-            filter_column="transcript_id",
-            filter_value=self.id,
-            feature="exon")
+            columns, filter_column="transcript_id", filter_value=self.id, feature="exon"
+        )
 
         # fill this list in its correct order (by exon_number) by using
         # the exon_number as a 1-based list offset
@@ -133,15 +137,16 @@ def exons(self):
             exon = self.genome.exon_by_id(exon_id)
             if exon is None:
                 raise ValueError(
-                    "Missing exon %s for transcript %s" % (
-                        exon_number, self.id))
+                    "Missing exon %s for transcript %s" % (exon_number, self.id)
+                )
             exon_number = int(exon_number)
             if exon_number < 1:
                 raise ValueError("Invalid exon number: %s" % exon_number)
             elif exon_number > len(exons):
                 raise ValueError(
-                    "Invalid exon number: %s (max expected = %d)" % (
-                        exon_number, len(exons)))
+                    "Invalid exon number: %s (max expected = %d)"
+                    % (exon_number, len(exons))
+                )
 
             # exon_number is 1-based, convert to list index by subtracting 1
             exon_idx = exon_number - 1
@@ -164,12 +169,13 @@ def _transcript_feature_position_ranges(self, feature, required=True):
             select_column_names=["start", "end"],
             filter_column="transcript_id",
             filter_value=self.id,
-            feature=feature)
+            feature=feature,
+        )
 
         if required and len(results) == 0:
             raise ValueError(
-                "Transcript %s does not contain feature %s" % (
-                    self.id, feature))
+                "Transcript %s does not contain feature %s" % (self.id, feature)
+            )
         return results
 
     @memoize
@@ -177,20 +183,20 @@ def _transcript_feature_positions(self, feature):
         """
         Get unique positions for feature, raise an error if feature is absent.
         """
-        ranges = self._transcript_feature_position_ranges(
-            feature, required=True)
+        ranges = self._transcript_feature_position_ranges(feature, required=True)
         results = []
         # a feature (such as a stop codon), maybe be split over multiple
         # contiguous ranges. Collect all the nucleotide positions into a
         # single list.
-        for (start, end) in ranges:
+        for start, end in ranges:
             # since ranges are [inclusive, inclusive] and
             # Python ranges are [inclusive, exclusive) we have to increment
             # the end position
             for position in range(start, end + 1):
                 if position in results:
                     raise ValueError(
-                        "Repeated position %d for %s" % (position, feature))
+                        "Repeated position %d for %s" % (position, feature)
+                    )
                 results.append(position)
         return results
 
@@ -207,10 +213,9 @@ def _codon_positions(self, feature):
         results = self._transcript_feature_positions(feature)
         if len(results) != 3:
             raise ValueError(
-                "Expected 3 positions for %s of %s but got %d" % (
-                    feature,
-                    self.id,
-                    len(results)))
+                "Expected 3 positions for %s of %s but got %d"
+                % (feature, self.id, len(results))
+            )
         return results
 
     @memoized_property
@@ -219,7 +224,8 @@ def contains_start_codon(self):
         Does this transcript have an annotated start_codon entry?
         """
         start_codons = self._transcript_feature_position_ranges(
-            "start_codon", required=False)
+            "start_codon", required=False
+        )
         return len(start_codons) > 0
 
     @memoized_property
@@ -228,9 +234,10 @@ def contains_stop_codon(self):
         Does this transcript have an annotated stop_codon entry?
         """
         stop_codons = self._transcript_feature_position_ranges(
-            "stop_codon", required=False)
+            "stop_codon", required=False
+        )
         return len(stop_codons) > 0
-    
+
     @memoized_property
     def start_codon_complete(self):
         """
@@ -266,9 +273,10 @@ def exon_intervals(self):
             select_column_names=["exon_number", "start", "end"],
             filter_column="transcript_id",
             filter_value=self.id,
-            feature="exon")
+            feature="exon",
+        )
         sorted_intervals = [None] * len(results)
-        for (exon_number, start, end) in results:
+        for exon_number, start, end in results:
             sorted_intervals[int(exon_number) - 1] = (start, end)
         return sorted_intervals
 
@@ -281,15 +289,15 @@ def spliced_offset(self, position):
         """
         if type(position) is not int:
             raise TypeError(
-                "Position argument must be an integer, got %s : %s" % (
-                    position, type(position)))
+                "Position argument must be an integer, got %s : %s"
+                % (position, type(position))
+            )
 
         if position < self.start or position > self.end:
             raise ValueError(
-                "Invalid position: %d (must be between %d and %d)" % (
-                    position,
-                    self.start,
-                    self.end))
+                "Invalid position: %d (must be between %d and %d)"
+                % (position, self.start, self.end)
+            )
 
         # offset from beginning of unspliced transcript (including introns)
         unspliced_offset = self.offset(position)
@@ -306,7 +314,8 @@ def spliced_offset(self, position):
         # Intron vs. Exon: ...iiiiiieeeeeeiiiiiiiiiiiiiiiieeeeeeiiiiiiiiiii...
         for exon in self.exons:
             exon_unspliced_start, exon_unspliced_end = self.offset_range(
-                exon.start, exon.end)
+                exon.start, exon.end
+            )
             # If the relative position is not within this exon, keep a running
             # total of the total exonic length-so-far.
             #
@@ -323,8 +332,8 @@ def spliced_offset(self, position):
                 exon_length = len(exon)  # exon_end_position - exon_start_position + 1
                 total_spliced_offset += exon_length
         raise ValueError(
-            "Couldn't find position %d on any exon of %s" % (
-                position, self.id))
+            "Couldn't find position %d on any exon of %s" % (position, self.id)
+        )
 
     @memoized_property
     def start_codon_unspliced_offsets(self):
@@ -332,11 +341,7 @@ def start_codon_unspliced_offsets(self):
         Offsets from start of unspliced pre-mRNA transcript
         of nucleotides in start codon.
         """
-        return [
-            self.offset(position)
-            for position
-            in self.start_codon_positions
-        ]
+        return [self.offset(position) for position in self.start_codon_positions]
 
     @memoized_property
     def stop_codon_unspliced_offsets(self):
@@ -344,11 +349,7 @@ def stop_codon_unspliced_offsets(self):
         Offsets from start of unspliced pre-mRNA transcript
         of nucleotides in stop codon.
         """
-        return [
-            self.offset(position)
-            for position
-            in self.stop_codon_positions
-        ]
+        return [self.offset(position) for position in self.stop_codon_positions]
 
     def _contiguous_offsets(self, offsets):
         """
@@ -358,8 +359,7 @@ def _contiguous_offsets(self, offsets):
         offsets.sort()
         for i in range(len(offsets) - 1):
             if offsets[i] + 1 != offsets[i + 1]:
-                raise ValueError(
-                    "Offsets not contiguous: %s" % (offsets,))
+                raise ValueError("Offsets not contiguous: %s" % (offsets,))
         return offsets
 
     @memoized_property
@@ -369,9 +369,7 @@ def start_codon_spliced_offsets(self):
         of nucleotides in start codon.
         """
         offsets = [
-            self.spliced_offset(position)
-            for position
-            in self.start_codon_positions
+            self.spliced_offset(position) for position in self.start_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
@@ -382,9 +380,7 @@ def stop_codon_spliced_offsets(self):
         of nucleotides in stop codon.
         """
         offsets = [
-            self.spliced_offset(position)
-            for position
-            in self.stop_codon_positions
+            self.spliced_offset(position) for position in self.stop_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
@@ -403,11 +399,11 @@ def complete(self):
         a coding sequence whose length is divisible by 3
         """
         return (
-            self.contains_start_codon and
-            self.start_codon_complete and
-            self.contains_stop_codon and
-            self.coding_sequence is not None and
-            len(self.coding_sequence) % 3 == 0
+            self.contains_start_codon
+            and self.start_codon_complete
+            and self.contains_stop_codon
+            and self.coding_sequence is not None
+            and len(self.coding_sequence) % 3 == 0
         )
 
     @memoized_property
@@ -459,7 +455,7 @@ def coding_sequence(self):
 
         # pylint: disable=invalid-slice-index
         # TODO(tavi) Figure out pylint is not happy with this slice
-        return self.sequence[start:end + 1]
+        return self.sequence[start : end + 1]
 
     @memoized_property
     def five_prime_utr_sequence(self):
@@ -469,7 +465,7 @@ def five_prime_utr_sequence(self):
         """
         # pylint: disable=invalid-slice-index
         # TODO(tavi) Figure out pylint is not happy with this slice
-        return self.sequence[:self.first_start_codon_spliced_offset]
+        return self.sequence[: self.first_start_codon_spliced_offset]
 
     @memoized_property
     def three_prime_utr_sequence(self):
@@ -477,7 +473,7 @@ def three_prime_utr_sequence(self):
         cDNA sequence of 3' UTR
         (untranslated region at the end of the transcript)
         """
-        return self.sequence[self.last_stop_codon_spliced_offset + 1:]
+        return self.sequence[self.last_stop_codon_spliced_offset + 1 :]
 
     @memoized_property
     def protein_id(self):
@@ -487,7 +483,8 @@ def protein_id(self):
             filter_value=self.id,
             feature="CDS",
             distinct=True,
-            required=False)
+            required=False,
+        )
         if result_tuple:
             return result_tuple[0]
         else:
diff --git a/pyensembl/version.py b/pyensembl/version.py
index 73b4b05..519574c 100644
--- a/pyensembl/version.py
+++ b/pyensembl/version.py
@@ -1 +1 @@
-__version__ = "2.2.9"
+__version__ = "2.2.10"
diff --git a/test/common.py b/test/common.py
index 9b20c3b..094b6a2 100644
--- a/test/common.py
+++ b/test/common.py
@@ -14,6 +14,7 @@
 
 contigs = [str(c) for c in range(1, 23)] + ["X", "Y", "M"]
 
+
 @nottest
 def test_ensembl_releases(*versions):
     """
@@ -33,7 +34,9 @@ def decorator(test_fn):
         def new_test_fn():
             for ensembl in ensembl_releases:
                 test_fn(ensembl)
+
         return new_test_fn
+
     return decorator
 
 
diff --git a/test/data.py b/test/data.py
index 0b41369..60cd08a 100644
--- a/test/data.py
+++ b/test/data.py
@@ -21,25 +21,29 @@ def data_path(name):
 CTNNBIP1_004_transcript_id = "ENST00000377256"
 
 # coding sequence for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_CDS = "".join([
-    "ATG",
-    "AACCGCGAGGGAGCTCCCGGGAAGAGTCCGGAG",
-    "GAGATGTACATTCAGCAGAAGGTCCGAGTGCTGCTCATGCTGCGGAAGATGGGATCAAAC",
-    "CTGACAGCCAGCGAGGAGGAGTTCCTGCGCACCTATGCAGGGGTGGTCAACAGCCAGCTC",
-    "AGCCAGCTGCCTCCGCACTCCATCGACCAGG",
-    "GTGCAGAGGACGTGGTGATGGCGTTTTCCAGGTCGGAGACGGAAGACCGGAGGCAG",
-    "TAG"
-])
+CTNNBIP1_004_CDS = "".join(
+    [
+        "ATG",
+        "AACCGCGAGGGAGCTCCCGGGAAGAGTCCGGAG",
+        "GAGATGTACATTCAGCAGAAGGTCCGAGTGCTGCTCATGCTGCGGAAGATGGGATCAAAC",
+        "CTGACAGCCAGCGAGGAGGAGTTCCTGCGCACCTATGCAGGGGTGGTCAACAGCCAGCTC",
+        "AGCCAGCTGCCTCCGCACTCCATCGACCAGG",
+        "GTGCAGAGGACGTGGTGATGGCGTTTTCCAGGTCGGAGACGGAAGACCGGAGGCAG",
+        "TAG",
+    ]
+)
 
 # 5' UTR for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_UTR5 = "".join([
-    "TGTGGGTGCAGGTTTCCTGGGCTTGCCAGACACACAGGGCGGCACCTTCCTACTTCTGCC",
-    "CAGCCACAGCCCTCCCCTCACAGTTGAGCACCTGTTTGCCTGAAGTTAATTTCCAGAAGC",
-    "AGGAGTCCCCAGAGCCAGGCAGGGGG"])
+CTNNBIP1_004_UTR5 = "".join(
+    [
+        "TGTGGGTGCAGGTTTCCTGGGCTTGCCAGACACACAGGGCGGCACCTTCCTACTTCTGCC",
+        "CAGCCACAGCCCTCCCCTCACAGTTGAGCACCTGTTTGCCTGAAGTTAATTTCCAGAAGC",
+        "AGGAGTCCCCAGAGCCAGGCAGGGGG",
+    ]
+)
 
 # 3' UTR for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_UTR3 = \
-    "CTGCAAAGCCCTTGGAACACCCTGGATGCTGTTGAGGGCCAAGAGATCTGTGTGGCTCC"
+CTNNBIP1_004_UTR3 = "CTGCAAAGCCCTTGGAACACCCTGGATGCTGTTGAGGGCCAAGAGATCTGTGTGGCTCC"
 
 CTNNBIP1_004_locus = Locus("1", 9850659, 9878176, "-")
 
@@ -47,20 +51,14 @@ def data_path(name):
 # http://useast.ensembl.org/Homo_sapiens/Transcript/Exons?g=ENSG00000178585;
 # r=1:9850659-9878176;redirect=no;t=ENST00000377256
 CTTNNIP1_004_exon_ids = [
-    'ENSE00001473268',
-    'ENSE00001643659',
-    'ENSE00001600669',
-    'ENSE00001267940',
-    'ENSE00001473265',
+    "ENSE00001473268",
+    "ENSE00001643659",
+    "ENSE00001600669",
+    "ENSE00001267940",
+    "ENSE00001473265",
 ]
 
-CTTNNIP1_004_exon_lengths = [
-    37,
-    85,
-    120,
-    91,
-    118
-]
+CTTNNIP1_004_exon_lengths = [37, 85, 120, 91, 118]
 
 
 #
@@ -72,26 +70,28 @@ def data_path(name):
 EGFR_001_transcript_id = "ENST00000275493"
 EGFR_001_ccds_id = "CCDS5514"
 EGFR_001_protein_id = "ENSP00000275493"
-EGFR_001_protein_sequence = "".join([
-    "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYV",
-    "QRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNL",
-    "QEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKL",
-    "TKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVN",
-    "PEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLS",
-    "INATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAF",
-    "ENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKI",
-    "ISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHP",
-    "ECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTG",
-    "PGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPN",
-    "QALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVD",
-    "NPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAA",
-    "RNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTF",
-    "GSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLV",
-    "IQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACI",
-    "DRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPS"
-    "RDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGS"
-    "TAENAEYLRVAPQSSEFIGA"
-])
+EGFR_001_protein_sequence = "".join(
+    [
+        "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYV",
+        "QRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNL",
+        "QEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKL",
+        "TKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVN",
+        "PEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLS",
+        "INATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAF",
+        "ENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKI",
+        "ISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHP",
+        "ECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTG",
+        "PGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPN",
+        "QALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVD",
+        "NPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAA",
+        "RNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTF",
+        "GSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLV",
+        "IQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACI",
+        "DRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPS"
+        "RDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGS"
+        "TAENAEYLRVAPQSSEFIGA",
+    ]
+)
 
 
 # GTF cropped from ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus/
@@ -114,13 +114,17 @@ def data_path(name):
 # http://useast.ensembl.org/Mus_musculus/Gene/Summary?db=core;g=ENSMUSG00000017167
 
 MOUSE_ENSMUSG00000017167_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf"
+)
 MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa"
+)
 MOUSE_ENSMUSG00000088969_NCRNA_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa")
+    "mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa"
+)
 MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep"
+)
 
 
 custom_mouse_genome_grcm38_subset = Genome(
@@ -128,7 +132,8 @@ def data_path(name):
     annotation_name="_test_mouse_ensembl81_subset",
     gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
     transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
-    protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH])
+    protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
+)
 
 
 def setup_init_custom_mouse_genome():
diff --git a/test/test_contigs.py b/test/test_contigs.py
index 1101061..b4eb702 100644
--- a/test/test_contigs.py
+++ b/test/test_contigs.py
@@ -2,6 +2,7 @@
 
 grch38 = genome_for_reference_name("GRCh38")
 
+
 def test_contig_names():
     contig_names = set(grch38.contigs())
     for chrom in list(range(1, 23)) + ["X", "Y", "MT"]:
diff --git a/test/test_download_cache.py b/test/test_download_cache.py
index 03c7da6..2bf5913 100644
--- a/test/test_download_cache.py
+++ b/test/test_download_cache.py
@@ -1,9 +1,5 @@
 from nose.tools import assert_raises, ok_
-from pyensembl.download_cache import (
-    DownloadCache,
-    MissingLocalFile,
-    MissingRemoteFile
-)
+from pyensembl.download_cache import DownloadCache, MissingLocalFile, MissingRemoteFile
 
 import os
 import tempfile
@@ -13,21 +9,27 @@
 download_cache = DownloadCache(
     reference_name="__test_reference",
     annotation_name="__test_annotation",
-    copy_local_files_to_cache=False)
+    copy_local_files_to_cache=False,
+)
+
 
 def test_download_cache_missing_local_file():
     # clear the cache
     download_cache.delete_cache_directory()
     with assert_raises(MissingLocalFile):
         download_cache.download_or_copy_if_necessary(
-            path_or_url="test_file_doesn_not_exist.file")
+            path_or_url="test_file_doesn_not_exist.file"
+        )
+
 
 def test_download_cache_missing_remote_file():
     # clear the cache
     download_cache.delete_cache_directory()
     with assert_raises(MissingRemoteFile):
         download_cache.download_or_copy_if_necessary(
-            path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL")
+            path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL"
+        )
+
 
 def test_download_cache_custom_location():
     test_file = "refseq.ucsc.small.gtf"
@@ -36,29 +38,27 @@ def test_download_cache_custom_location():
     print("DIR: %s" % tmp_dir)
     assert tmp_dir is not None
 
-    os.environ['PYENSEMBL_CACHE_DIR'] = tmp_dir
+    os.environ["PYENSEMBL_CACHE_DIR"] = tmp_dir
 
     # We need another instance of DownloadCache
     # that copies files over to cache folder
     download_cache = DownloadCache(
         reference_name="test_reference",
         annotation_name="test_annotation",
-        copy_local_files_to_cache=True)
+        copy_local_files_to_cache=True,
+    )
 
     # clean up
     download_cache.delete_cache_directory()
     download_cache.download_or_copy_if_necessary(
-        download_if_missing=True,
-        path_or_url=data_path(test_file))
+        download_if_missing=True, path_or_url=data_path(test_file)
+    )
 
     full_path = os.path.join(
-        tmp_dir,
-        "pyensembl",
-        "test_reference",
-        "test_annotation",
-        test_file)
+        tmp_dir, "pyensembl", "test_reference", "test_annotation", test_file
+    )
     print("FULL PATH: %s" % full_path)
     assert len(full_path) > 0
 
     ok_(os.path.exists(full_path))
-    del os.environ['PYENSEMBL_CACHE_DIR']
+    del os.environ["PYENSEMBL_CACHE_DIR"]
diff --git a/test/test_ensembl_gtf.py b/test/test_ensembl_gtf.py
index 040023b..c22cf74 100644
--- a/test/test_ensembl_gtf.py
+++ b/test/test_ensembl_gtf.py
@@ -4,6 +4,7 @@
 
 from .common import test_ensembl_releases
 
+
 @test_ensembl_releases()
 def gtf_path_endswith_gtf_gz(ensembl):
     path = ensembl.gtf.gtf_path
diff --git a/test/test_ensembl_object_properties.py b/test/test_ensembl_object_properties.py
index ff90dcf..b3c4582 100644
--- a/test/test_ensembl_object_properties.py
+++ b/test/test_ensembl_object_properties.py
@@ -8,6 +8,7 @@
 from nose.tools import eq_
 from pyensembl import EnsemblRelease, MAX_ENSEMBL_RELEASE
 
+
 def test_human_reference_name():
     eq_(EnsemblRelease(release=54).reference_name, "NCBI36")
     eq_(EnsemblRelease(release=74).reference_name, "GRCh37")
diff --git a/test/test_exon_id.py b/test/test_exon_id.py
index ceb145f..18590f8 100644
--- a/test/test_exon_id.py
+++ b/test/test_exon_id.py
@@ -10,71 +10,104 @@
 
 # all exons associated with TP53 gene in Ensembl release 77
 TP53_EXON_IDS_RELEASE_77 = [
-    'ENSE00002337729', 'ENSE00002419584',
-    'ENSE00003625790', 'ENSE00003518480',
-    'ENSE00003723991', 'ENSE00003712342',
-    'ENSE00001657961', 'ENSE00003725258',
-    'ENSE00003740946', 'ENSE00002204316',
-    'ENSE00002064269', 'ENSE00003750554',
-    'ENSE00003634848', 'ENSE00003492844',
-    'ENSE00003735852', 'ENSE00003545950',
-    'ENSE00003605891', 'ENSE00002051192',
-    'ENSE00002084733', 'ENSE00003726882',
-    'ENSE00001146308', 'ENSE00002667911',
-    'ENSE00003752869', 'ENSE00003739898',
-    'ENSE00003753508', 'ENSE00002034209',
-    'ENSE00002030826', 'ENSE00001596491',
-    'ENSE00002037735', 'ENSE00003736616',
-    'ENSE00002672443', 'ENSE00002226620',
-    'ENSE00003715195', 'ENSE00003750794',
-    'ENSE00003745267', 'ENSE00003746220',
-    'ENSE00003656695', 'ENSE00003669712',
-    'ENSE00002051873', 'ENSE00002048269',
-    'ENSE00002670535', 'ENSE00002677565',
-    'ENSE00003532881', 'ENSE00003520683',
-    'ENSE00002076714', 'ENSE00002062958',
-    'ENSE00002073243', 'ENSE00003670707',
-    'ENSE00002065802', 'ENSE00002362269'
+    "ENSE00002337729",
+    "ENSE00002419584",
+    "ENSE00003625790",
+    "ENSE00003518480",
+    "ENSE00003723991",
+    "ENSE00003712342",
+    "ENSE00001657961",
+    "ENSE00003725258",
+    "ENSE00003740946",
+    "ENSE00002204316",
+    "ENSE00002064269",
+    "ENSE00003750554",
+    "ENSE00003634848",
+    "ENSE00003492844",
+    "ENSE00003735852",
+    "ENSE00003545950",
+    "ENSE00003605891",
+    "ENSE00002051192",
+    "ENSE00002084733",
+    "ENSE00003726882",
+    "ENSE00001146308",
+    "ENSE00002667911",
+    "ENSE00003752869",
+    "ENSE00003739898",
+    "ENSE00003753508",
+    "ENSE00002034209",
+    "ENSE00002030826",
+    "ENSE00001596491",
+    "ENSE00002037735",
+    "ENSE00003736616",
+    "ENSE00002672443",
+    "ENSE00002226620",
+    "ENSE00003715195",
+    "ENSE00003750794",
+    "ENSE00003745267",
+    "ENSE00003746220",
+    "ENSE00003656695",
+    "ENSE00003669712",
+    "ENSE00002051873",
+    "ENSE00002048269",
+    "ENSE00002670535",
+    "ENSE00002677565",
+    "ENSE00003532881",
+    "ENSE00003520683",
+    "ENSE00002076714",
+    "ENSE00002062958",
+    "ENSE00002073243",
+    "ENSE00003670707",
+    "ENSE00002065802",
+    "ENSE00002362269",
 ]
 
+
 def test_exon_ids_of_gene_id():
     """
     test_exon_ids_of_gene_id: Ensure that gene_id ENSG00000141510 (name=TP53),
     has all the same exon IDs found on the Ensembl website.
     """
-    exon_ids = ensembl.exon_ids_of_gene_id('ENSG00000141510')
-    assert len(exon_ids) == len(TP53_EXON_IDS_RELEASE_77), \
-        "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
-            len(TP53_EXON_IDS_RELEASE_77),
-            len(exon_ids),
-            len(set(exon_ids)))
+    exon_ids = ensembl.exon_ids_of_gene_id("ENSG00000141510")
+    assert len(exon_ids) == len(
+        TP53_EXON_IDS_RELEASE_77
+    ), "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
+        len(TP53_EXON_IDS_RELEASE_77),
+        len(exon_ids),
+        len(set(exon_ids)),
+    )
     assert all(exon_id in TP53_EXON_IDS_RELEASE_77 for exon_id in exon_ids)
 
+
 def test_exon_ids_of_gene_name():
     """
     test_exon_ids_of_gene_name: Ensure that TP53 has the same exon IDs found
     on the Ensembl website.
     """
     exon_ids = ensembl.exon_ids_of_gene_name("TP53")
-    assert len(exon_ids) == len(TP53_EXON_IDS_RELEASE_77), \
-        "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
-            len(TP53_EXON_IDS_RELEASE_77),
-            len(exon_ids),
-            len(set(exon_ids)))
+    assert len(exon_ids) == len(
+        TP53_EXON_IDS_RELEASE_77
+    ), "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
+        len(TP53_EXON_IDS_RELEASE_77),
+        len(exon_ids),
+        len(set(exon_ids)),
+    )
     assert all(exon_id in TP53_EXON_IDS_RELEASE_77 for exon_id in exon_ids)
 
+
 # Exon IDs of transcript TP53-026
 TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 = [
-    'ENSE00002064269',
-    'ENSE00003723991',
-    'ENSE00003712342',
-    'ENSE00003725258',
-    'ENSE00003740946',
-    'ENSE00003750554',
-    'ENSE00003634848',
-    'ENSE00003492844'
+    "ENSE00002064269",
+    "ENSE00003723991",
+    "ENSE00003712342",
+    "ENSE00003725258",
+    "ENSE00003740946",
+    "ENSE00003750554",
+    "ENSE00003634848",
+    "ENSE00003492844",
 ]
 
+
 def test_exon_ids_of_transcript_name():
     """
     test_exon_ids_of_transcript_name : Look up exon IDs of transcript TP53-026
@@ -82,13 +115,16 @@ def test_exon_ids_of_transcript_name():
     for release 77
     """
     exon_ids = ensembl.exon_ids_of_transcript_name("TP53-026")
-    assert len(exon_ids) == len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77), \
-        "Expected %d exons, got %d" % (
-            len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
-            len(exon_ids))
+    assert len(exon_ids) == len(
+        TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
+    ), "Expected %d exons, got %d" % (
+        len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
+        len(exon_ids),
+    )
     assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-        for exon_id in exon_ids)
+        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 for exon_id in exon_ids
+    )
+
 
 def exon_ids_of_transcript_id():
     """
@@ -97,10 +133,12 @@ def exon_ids_of_transcript_id():
     what we find on the Ensembl website.
     """
     exon_ids = ensembl.exon_ids_of_transcript_id("ENST00000610623")
-    assert len(exon_ids) == len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77), \
-        "Expected %d exons, got %d" % (
-            len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
-            len(exon_ids))
+    assert len(exon_ids) == len(
+        TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
+    ), "Expected %d exons, got %d" % (
+        len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
+        len(exon_ids),
+    )
     assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-        for exon_id in exon_ids)
+        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 for exon_id in exon_ids
+    )
diff --git a/test/test_exon_object.py b/test/test_exon_object.py
index 4587284..9a77cde 100644
--- a/test/test_exon_object.py
+++ b/test/test_exon_object.py
@@ -9,14 +9,14 @@
 
 ensembl = cached_release(77)
 
+
 def test_exon_object_by_id():
     """
     test_exon_object_by_id : check properties of exon 4 of CTNNB1 when looked
     up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00003464041")
-    assert exon.gene_name == "CTNNB1", \
-        "Unexpected gene name: %s" % exon.gene_name
+    assert exon.gene_name == "CTNNB1", "Unexpected gene name: %s" % exon.gene_name
     assert exon.contig == "3", exon.contig
     assert exon.strand == "+"
     assert exon.on_forward_strand
@@ -25,14 +25,14 @@ def test_exon_object_by_id():
     assert exon.end == 41224753, "Unexpected exon end: %s" % exon.end
     assert exon.length == len(exon) == 228
 
+
 def test_exon_object_by_id_on_negative_strand():
     """
     test_exon_object_by_id_on_negative_strand : check properties of exon 1
     from CXCR3 when looked up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert exon.gene_name == "CXCR3", \
-        "Unexpected gene name: %s" % exon.gene_name
+    assert exon.gene_name == "CXCR3", "Unexpected gene name: %s" % exon.gene_name
     assert exon.contig == "X", exon.contig
     assert exon.strand == "-"
     assert exon.on_backward_strand
@@ -57,6 +57,7 @@ def test_exon_object_at_locus():
         assert exon.start <= 41224526, "Unexpected exon start: %s" % exon.start
         assert exon.end >= 41224526, "Unexpected exon end: %s" % exon.end
 
+
 def test_exon_object_at_locus_on_negative_strand():
     """
     test_exon_object_at_locus : check properties of exon 1 of CXCR3 when looked
@@ -72,6 +73,7 @@ def test_exon_object_at_locus_on_negative_strand():
         assert exon.start <= 71618517, "Unexpected exon start: %s" % exon.start
         assert exon.end >= 71618517, "Unexpected exon end: %s" % exon.end
 
+
 def test_exon_basic_properties_str():
     exon = ensembl.exon_by_id("ENSE00001817013")
     assert isinstance(str(exon), str)
@@ -81,11 +83,14 @@ def test_exon_basic_properties_str():
     # change this test
     assert str(exon) == repr(exon), "%s != %s" % (str(exon), repr(exon))
 
+
 def test_exon_basic_properties_hash():
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert isinstance(hash(exon), int), \
-        "Hash function returns %s instead of int" % (
-            type(hash(exon),))
+    assert isinstance(hash(exon), int), "Hash function returns %s instead of int" % (
+        type(
+            hash(exon),
+        )
+    )
     assert hash(exon) == hash(exon), "Hash function is non-deterministic!"
     other_exon = ensembl.exon_by_id("ENSE00003464041")
     assert exon != other_exon
diff --git a/test/test_gene_ids.py b/test/test_gene_ids.py
index b121b8b..3f1420e 100644
--- a/test/test_gene_ids.py
+++ b/test/test_gene_ids.py
@@ -13,6 +13,7 @@
 
 ensembl77 = cached_release(77, "human")
 
+
 def test_gene_ids_grch38_hla_a():
     # chr6:29,945,884  is a position for HLA-A
     # Gene ID = ENSG00000206503
@@ -21,40 +22,55 @@ def test_gene_ids_grch38_hla_a():
     # Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
     ids = ensembl_grch38.gene_ids_at_locus(6, 29945884)
     expected = "ENSG00000206503"
-    assert ids == ["ENSG00000206503"], \
-        "Expected HLA-A, gene ID = %s, got: %s" % (expected, ids)
+    assert ids == ["ENSG00000206503"], "Expected HLA-A, gene ID = %s, got: %s" % (
+        expected,
+        ids,
+    )
+
 
 def test_gene_ids_of_gene_name_hla_grch38():
     hla_a_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-A")
-    assert 'ENSG00000206503' in hla_a_gene_ids, hla_a_gene_ids
+    assert "ENSG00000206503" in hla_a_gene_ids, hla_a_gene_ids
 
     hla_b_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-B")
-    assert 'ENSG00000234745' in hla_b_gene_ids, hla_b_gene_ids
+    assert "ENSG00000234745" in hla_b_gene_ids, hla_b_gene_ids
 
     hla_c_gene_ids = ensembl_grch38.gene_ids_of_gene_name("HLA-C")
-    assert 'ENSG00000204525' in hla_c_gene_ids, hla_c_gene_ids
+    assert "ENSG00000204525" in hla_c_gene_ids, hla_c_gene_ids
+
 
 def test_gene_id_of_protein_id_release77():
     gene_id = ensembl77.gene_id_of_protein_id("ENSP00000485677")
-    ok_('ENSG00000279634', gene_id)
+    ok_("ENSG00000279634", gene_id)
+
 
 def test_gene_id_of_invalid_name():
     with assert_raises(Exception):
-        ensembl_grch38.gene_ids_of_gene_name(
-            "A wonderous pony sees through your soul")
+        ensembl_grch38.gene_ids_of_gene_name("A wonderous pony sees through your soul")
+
 
 @test_ensembl_releases()
 def test_gene_ids_on_contig(ensembl):
     gene_ids_chr17 = ensembl.gene_ids(contig=17)
     # gene ID of TP53
     tp53 = "ENSG00000141510"
-    assert tp53 in gene_ids_chr17, \
-        "Missing %s from %s on chr17, example IDs: %s (total = %d)" % (
-            tp53, ensembl, gene_ids_chr17[:5], len(gene_ids_chr17))
+    assert (
+        tp53 in gene_ids_chr17
+    ), "Missing %s from %s on chr17, example IDs: %s (total = %d)" % (
+        tp53,
+        ensembl,
+        gene_ids_chr17[:5],
+        len(gene_ids_chr17),
+    )
 
     # gene ID of SMAD4
     gene_ids_chr18 = ensembl.gene_ids(contig=18)
     smad4 = "ENSG00000141646"
-    assert smad4 in gene_ids_chr18, \
-        "Missing %s from %s on chr18, example result: %s (total = %d)" % (
-            smad4, ensembl, gene_ids_chr18[:5], len(gene_ids_chr18))
+    assert (
+        smad4 in gene_ids_chr18
+    ), "Missing %s from %s on chr18, example result: %s (total = %d)" % (
+        smad4,
+        ensembl,
+        gene_ids_chr18[:5],
+        len(gene_ids_chr18),
+    )
diff --git a/test/test_gene_names.py b/test/test_gene_names.py
index e6c839c..626537b 100644
--- a/test/test_gene_names.py
+++ b/test/test_gene_names.py
@@ -17,6 +17,7 @@
     "HLA-A",
 ]
 
+
 @test_ensembl_releases()
 def test_all_gene_names(ensembl):
     """
@@ -26,8 +27,11 @@ def test_all_gene_names(ensembl):
     gene_names = ensembl.gene_names()
     print(type(gene_names))
     for gene_name in KNOWN_GENE_NAMES:
-        assert gene_name in gene_names, \
-            "Missing gene name %s from %s" % (gene_name, ensembl)
+        assert gene_name in gene_names, "Missing gene name %s from %s" % (
+            gene_name,
+            ensembl,
+        )
+
 
 def test_gene_names_at_locus_grch38_hla_a():
     # chr6:29,945,884  is a position for HLA-A
@@ -37,25 +41,31 @@ def test_gene_names_at_locus_grch38_hla_a():
     names = grch38.gene_names_at_locus(6, 29945884)
     assert names == ["HLA-A"], "Expected gene name HLA-A, got: %s" % (names,)
 
+
 @test_ensembl_releases()
 def test_gene_names_on_contig(ensembl):
     gene_names_chr17 = ensembl.gene_names(17)
-    assert "TP53" in gene_names_chr17, \
-        "No TP53 in gene names on chr17 of %s, gene names: %s ... (%d)" % (
-            ensembl, list(gene_names_chr17[:4]), len(gene_names_chr17))
+    assert (
+        "TP53" in gene_names_chr17
+    ), "No TP53 in gene names on chr17 of %s, gene names: %s ... (%d)" % (
+        ensembl,
+        list(gene_names_chr17[:4]),
+        len(gene_names_chr17),
+    )
 
     gene_names_chr18 = ensembl.gene_names(18)
-    assert "SMAD4" in gene_names_chr18, \
-        "No SMAD4 in gene names on chr18 of %s, gene names: %s ... (%d)" % (
-            ensembl, list(gene_names_chr18[:4]), len(gene_names_chr18))
+    assert (
+        "SMAD4" in gene_names_chr18
+    ), "No SMAD4 in gene names on chr18 of %s, gene names: %s ... (%d)" % (
+        ensembl,
+        list(gene_names_chr18[:4]),
+        len(gene_names_chr18),
+    )
 
 
 def test_gene_name_of_HLA_gene_id():
     gene_ids = grch38.gene_ids_of_gene_name("HLA-A")
-    gene_names = [
-        grch38.gene_name_of_gene_id(gene_id)
-        for gene_id in gene_ids
-    ]
+    gene_names = [grch38.gene_name_of_gene_id(gene_id) for gene_id in gene_ids]
     unique_gene_names = list(set(gene_names))
     assert len(unique_gene_names) == 1, (len(unique_gene_names), unique_gene_names)
     gene_name = unique_gene_names[0]
diff --git a/test/test_gene_objects.py b/test/test_gene_objects.py
index 2258f43..63fe006 100644
--- a/test/test_gene_objects.py
+++ b/test/test_gene_objects.py
@@ -3,27 +3,35 @@
 from .common import test_ensembl_releases
 from .data import TP53_gene_id
 
+
 @test_ensembl_releases()
 def test_TP53_gene_object_by_id(genome):
     # when we look up TP53 by its gene ID, we should get the
     # correct gene back
     gene = genome.gene_by_id(TP53_gene_id)
-    assert gene.name == "TP53", \
-        "Incorrect gene name %s for gene ID %s in %s" % (
-            gene.name, gene.id, genome)
-    assert gene.contig == "17", \
-        "Incorrect gene contig %s for gene ID %s in %s" % (
-            gene.contig, gene.id, genome)
+    assert gene.name == "TP53", "Incorrect gene name %s for gene ID %s in %s" % (
+        gene.name,
+        gene.id,
+        genome,
+    )
+    assert gene.contig == "17", "Incorrect gene contig %s for gene ID %s in %s" % (
+        gene.contig,
+        gene.id,
+        genome,
+    )
+
 
 @test_ensembl_releases()
 def test_TP53_gene_object_by_name(genome):
     genes = genome.genes_by_name("TP53")
     # we should only have one TP53 gene (there aren't any copies)
-    assert len(genes) == 1, \
-        "Expected only one gene with name TP53, got %s" % (genes,)
+    assert len(genes) == 1, "Expected only one gene with name TP53, got %s" % (genes,)
     # make sure it has the correct gene ID
-    assert genes[0].id == TP53_gene_id, \
-        "Expected gene to have ID %s, got %s" % (TP53_gene_id, genes[0].id)
+    assert genes[0].id == TP53_gene_id, "Expected gene to have ID %s, got %s" % (
+        TP53_gene_id,
+        genes[0].id,
+    )
+
 
 @test_ensembl_releases()
 def test_equal_genes(genome):
@@ -34,6 +42,7 @@ def test_equal_genes(genome):
     assert hash(gene1) == hash(gene2)
     assert gene1 == gene2
 
+
 @test_ensembl_releases()
 def test_not_equal_genes(genome):
     gene1 = genome.genes_by_name("MUC1")[0]
@@ -41,6 +50,7 @@ def test_not_equal_genes(genome):
     assert hash(gene1) != hash(gene2)
     assert gene1 != gene2
 
+
 @test_ensembl_releases()
 def test_BRCA1_protein_coding_biotype(genome):
     gene = genome.genes_by_name("BRCA1")[0]
diff --git a/test/test_id_length.py b/test/test_id_length.py
index 7371cd4..cc61869 100644
--- a/test/test_id_length.py
+++ b/test/test_id_length.py
@@ -2,6 +2,7 @@
 
 from nose.tools import nottest
 
+
 @nottest
 def check_id_length(method_name):
     for release in major_releases:
@@ -9,16 +10,19 @@ def check_id_length(method_name):
         # only load chromosome Y to speed up tests
         idents = method(contig="Y")
         assert len(idents) > 0, "No values returned by %s" % method_name
-        assert all(len(ident) == 15 for ident in idents), \
-            "Invalid IDs for %s: %s" % (
-                method_name,
-                [ident for ident in idents if len(ident) != 15])
+        assert all(len(ident) == 15 for ident in idents), "Invalid IDs for %s: %s" % (
+            method_name,
+            [ident for ident in idents if len(ident) != 15],
+        )
+
 
 def test_gene_id_length():
-    check_id_length('gene_ids')
+    check_id_length("gene_ids")
+
 
 def test_transcript_id_length():
-    check_id_length('transcript_ids')
+    check_id_length("transcript_ids")
+
 
 def test_protein_id_length():
-    check_id_length('protein_ids')
+    check_id_length("protein_ids")
diff --git a/test/test_locus.py b/test/test_locus.py
index a1af6fd..475a018 100644
--- a/test/test_locus.py
+++ b/test/test_locus.py
@@ -3,6 +3,7 @@
 
 from nose.tools import assert_raises
 
+
 def test_normalize_chromosome():
     assert normalize_chromosome("X") == "X"
     assert normalize_chromosome("chrX") == "chrX"
@@ -38,6 +39,7 @@ def test_normalize_chromosome():
     with assert_raises(ValueError):
         normalize_chromosome(0)
 
+
 def test_locus_overlaps():
     locus = Locus("1", 10, 20, "+")
     assert locus.overlaps("1", 10, 20, "+")
@@ -57,6 +59,7 @@ def test_locus_overlaps():
     # wrong strand
     assert not locus.overlaps("1", 10, 20, "-")
 
+
 def test_locus_contains():
     locus = Locus("1", 10, 20, "+")
     assert locus.contains("1", 10, 20, "+")
@@ -82,6 +85,7 @@ def test_locus_contains():
     # wrong strand
     assert not locus.contains("1", 10, 20, "-")
 
+
 def test_position_offset():
     forward_locus = Locus("1", 10, 20, "+")
     assert forward_locus.offset(10) == 0
@@ -143,6 +147,7 @@ def test_range_offset():
     with assert_raises(ValueError):
         negative_locus.offset_range(9, 10)
 
+
 def test_locus_distance():
     locus_chr1_10_20_pos = Locus("1", 10, 20, "+")
     locus_chr1_21_25_pos = Locus("1", 21, 25, "+")
diff --git a/test/test_missing_genome_sources.py b/test/test_missing_genome_sources.py
index 35a4f41..6069261 100644
--- a/test/test_missing_genome_sources.py
+++ b/test/test_missing_genome_sources.py
@@ -4,37 +4,39 @@
 from .data import data_path
 
 MOUSE_ENSMUSG00000017167_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf"
+)
 MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa"
+)
 # MOUSE_ENSMUSG00000088969_NCRNA_FASTA_PATH = data_path(
 #    "mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa")
 MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep")
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep"
+)
+
 
 def no_gtf_(cm):
-    print("Testing for 'GTF' in %s : %s" % (
-        type(cm.exception),
-        cm.exception))
+    print("Testing for 'GTF' in %s : %s" % (type(cm.exception), cm.exception))
     ok_("GTF" in str(cm.exception))
 
+
 def no_transcript_(cm):
-    print("Testing for 'transcript' in %s : %s" % (
-        type(cm.exception),
-        cm.exception))
+    print("Testing for 'transcript' in %s : %s" % (type(cm.exception), cm.exception))
     ok_("transcript" in str(cm.exception))
 
+
 def no_protein_(cm):
-    print("Testing for 'protein' in %s : %s" % (
-        type(cm.exception),
-        cm.exception))
+    print("Testing for 'protein' in %s : %s" % (type(cm.exception), cm.exception))
     ok_("protein" in str(cm.exception))
 
+
 def test_transcript_fasta_only():
     genome = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH])
+        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+    )
     genome.index()
 
     eq_(2, len(genome.transcript_sequences.fasta_dictionary))
@@ -59,11 +61,13 @@ def test_transcript_fasta_only():
         genome.protein_sequence("test")
     no_protein_(cm)
 
+
 def test_protein_fasta_only():
     genome_only_proteins = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH])
+        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
+    )
     genome_only_proteins.index()
 
     eq_(4, len(genome_only_proteins.protein_sequences.fasta_dictionary))
@@ -76,11 +80,13 @@ def test_protein_fasta_only():
         genome_only_proteins.transcript_sequence("DOES_NOT_EXIST")
     no_transcript_(cm)
 
+
 def test_gtf_only():
     genome_only_gtf = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-        gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH)
+        gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
+    )
     genome_only_gtf.index()
 
     eq_(1, len(genome_only_gtf.genes()))
@@ -95,12 +101,14 @@ def test_gtf_only():
 
     no_protein_(cm)
 
+
 def test_gtf_transcript_only():
     genome_gtf_with_cdna = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH])
+        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+    )
     genome_gtf_with_cdna.index()
 
     eq_(1, len(genome_gtf_with_cdna.genes()))
@@ -112,12 +120,14 @@ def test_gtf_transcript_only():
         transcript.protein_sequence
     no_protein_(cm)
 
+
 def test_gtf_protein_only():
     genome_gtf_with_proteins = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH])
+        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
+    )
     genome_gtf_with_proteins.index()
 
     eq_(1, len(genome_gtf_with_proteins.genes()))
diff --git a/test/test_mouse.py b/test/test_mouse.py
index 24a0b4a..5ec03b6 100644
--- a/test/test_mouse.py
+++ b/test/test_mouse.py
@@ -1,9 +1,7 @@
 from nose.tools import eq_, with_setup
 
-from .data import (
-    custom_mouse_genome_grcm38_subset,
-    setup_init_custom_mouse_genome
-)
+from .data import custom_mouse_genome_grcm38_subset, setup_init_custom_mouse_genome
+
 
 @with_setup(setup=setup_init_custom_mouse_genome)
 def test_mouse_ENSMUSG00000017167():
@@ -39,9 +37,17 @@ def test_mouse_ENSMUSG00000017167():
     ]
     eq_(len(transcripts_coding_cntnap1), 1)
     transcript_cntnap1 = transcripts_coding_cntnap1[0]
-    eq_(transcript_cntnap1.sequence[:120],
-        ("GAGAGAAGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA"
-         "GAGAGAGAGAGATTGGGGGTAGGAGAGAGGGAAGGGTGGATAAGGACGGAAAAAAGCTTT"))
-    eq_(transcript_cntnap1.protein_sequence[:120],
-        ("MMSLRLFSILLATVVSGAWGWGYYGCNEELVGPLYARSLGASSYYGLFTTARFARLHGIS"
-         "GWSPRIGDPNPWLQIDLMKKHRIRAVATQGAFNSWDWVTRYMLLYGDRVDSWTPFYQKGH"))
+    eq_(
+        transcript_cntnap1.sequence[:120],
+        (
+            "GAGAGAAGGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGA"
+            "GAGAGAGAGAGATTGGGGGTAGGAGAGAGGGAAGGGTGGATAAGGACGGAAAAAAGCTTT"
+        ),
+    )
+    eq_(
+        transcript_cntnap1.protein_sequence[:120],
+        (
+            "MMSLRLFSILLATVVSGAWGWGYYGCNEELVGPLYARSLGASSYYGLFTTARFARLHGIS"
+            "GWSPRIGDPNPWLQIDLMKKHRIRAVATQGAFNSWDWVTRYMLLYGDRVDSWTPFYQKGH"
+        ),
+    )
diff --git a/test/test_release_versions.py b/test/test_release_versions.py
index 4eca06f..42761bd 100644
--- a/test/test_release_versions.py
+++ b/test/test_release_versions.py
@@ -7,29 +7,36 @@
 def test_version_too_old_1():
     EnsemblRelease(1)
 
+
 @raises(Exception)
 def test_version_too_old_47():
     EnsemblRelease(47)
 
+
 @raises(Exception)
 def test_version_is_not_numeric():
     EnsemblRelease("wuzzle")
 
+
 @raises(Exception)
 def test_version_is_none():
     EnsemblRelease(None)
 
+
 def test_max_ensembl_release():
-    assert isinstance(MAX_ENSEMBL_RELEASE, int), \
-        "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (
-            type(MAX_ENSEMBL_RELEASE),)
-    assert 83 <= MAX_ENSEMBL_RELEASE < 1000, \
+    assert isinstance(
+        MAX_ENSEMBL_RELEASE, int
+    ), "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (type(MAX_ENSEMBL_RELEASE),)
+    assert 83 <= MAX_ENSEMBL_RELEASE < 1000, (
         "Unexpected value for MAX_ENSEMBL_RELEASE: %d" % MAX_ENSEMBL_RELEASE
+    )
+
 
 def test_int_version():
     for version in range(54, MAX_ENSEMBL_RELEASE):
         EnsemblRelease(version)
 
+
 def test_str_version():
     for version in range(54, MAX_ENSEMBL_RELEASE):
         EnsemblRelease(str(version))
diff --git a/test/test_search.py b/test/test_search.py
index b04688e..40930a4 100644
--- a/test/test_search.py
+++ b/test/test_search.py
@@ -3,6 +3,7 @@
 from pyensembl import find_nearest_locus
 from .common import test_ensembl_releases
 
+
 @test_ensembl_releases()
 def test_find_nearest_BRAF_exon(ensembl):
     braf = ensembl.genes_by_name("BRAF")[0]
@@ -11,25 +12,23 @@ def test_find_nearest_BRAF_exon(ensembl):
     for exon in exons:
         # immediately before exon
         result_before = find_nearest_locus(
-            start=exon.start - 2,
-            end=exon.start - 1,
-            loci=exons)
+            start=exon.start - 2, end=exon.start - 1, loci=exons
+        )
         eq_(result_before, (1, exon))
 
         # overlapping with exon
         result_overlap = find_nearest_locus(
-            start=exon.start - 2,
-            end=exon.start + 1,
-            loci=exons)
+            start=exon.start - 2, end=exon.start + 1, loci=exons
+        )
         eq_(result_overlap, (0, exon))
 
         # immediately after exon
         result_after = find_nearest_locus(
-            start=exon.end + 1,
-            end=exon.end + 2,
-            loci=exons)
+            start=exon.end + 1, end=exon.end + 2, loci=exons
+        )
         eq_(result_after, (1, exon))
 
+
 @test_ensembl_releases()
 def test_find_nearest_BRAF_transcript(ensembl):
     braf_transcript = ensembl.genes_by_name("BRAF")[0].transcripts[0]
@@ -38,22 +37,19 @@ def test_find_nearest_BRAF_transcript(ensembl):
     for transcript in transcripts:
         # immediately before transcript
         result_before = find_nearest_locus(
-            start=transcript.start - 2,
-            end=transcript.start - 1,
-            loci=transcripts)
+            start=transcript.start - 2, end=transcript.start - 1, loci=transcripts
+        )
         eq_(result_before, (1, transcript))
 
         # overlapping with transcript
         result_overlap = find_nearest_locus(
-            start=transcript.start - 2,
-            end=transcript.start + 1,
-            loci=transcripts)
+            start=transcript.start - 2, end=transcript.start + 1, loci=transcripts
+        )
         eq_(result_overlap, (0, transcript))
 
         # immediately after transcript
         # may overlap with other transcripts
         result_after = find_nearest_locus(
-            start=transcript.end + 1,
-            end=transcript.end + 2,
-            loci=transcripts)
+            start=transcript.end + 1, end=transcript.end + 2, loci=transcripts
+        )
         eq_(result_after, (1, transcript))
diff --git a/test/test_sequence_data.py b/test/test_sequence_data.py
index 98fa7c4..1d8b7fd 100644
--- a/test/test_sequence_data.py
+++ b/test/test_sequence_data.py
@@ -13,16 +13,17 @@
 
 FASTA_PATH = data_path("mouse.ensembl.81.partial.ENSMUSG00000017167.fa")
 
+
 def test_sequence_type():
     with TemporaryDirectory() as tmpdir:
-        seqs_dna = SequenceData(
-            [FASTA_PATH],
-            cache_directory_path=tmpdir)
+        seqs_dna = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
         seq = seqs_dna.get("ENSMUST00000138942")
-        assert seq is not None, \
-            "Failed to find sequence for ENSMUST00000138942"
-        assert isinstance(seq, str), \
-            "Wrong sequence type, expected %s but got %s" % (str, type(seq))
+        assert seq is not None, "Failed to find sequence for ENSMUST00000138942"
+        assert isinstance(seq, str), "Wrong sequence type, expected %s but got %s" % (
+            str,
+            type(seq),
+        )
+
 
 def test_missing_sequence():
     with TemporaryDirectory() as tmpdir:
@@ -30,24 +31,24 @@ def test_missing_sequence():
         seq = seqs.get("NotInFasta")
         assert seq is None, "Should get None back for missing sequence"
 
+
 def test_clear_cache():
     with TemporaryDirectory() as tmpdir:
         seqs = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
-        assert not seqs._fasta_dictionary, \
-            "Expected _fasta_dictionary to load lazily"
+        assert not seqs._fasta_dictionary, "Expected _fasta_dictionary to load lazily"
 
         seqs._load_or_create_fasta_dictionary_pickle()
-        assert len(seqs._fasta_dictionary) > 0, \
-            "FASTA dictionary didn't get created"
+        assert len(seqs._fasta_dictionary) > 0, "FASTA dictionary didn't get created"
 
         seqs.clear_cache()
-        assert not seqs._fasta_dictionary, \
-            "Expected FASTA dictionary to be empty after clear_cache()"
+        assert (
+            not seqs._fasta_dictionary
+        ), "Expected FASTA dictionary to be empty after clear_cache()"
         for pickle_path in seqs.fasta_dictionary_pickle_paths:
-            assert not exists(pickle_path), \
-                "Cached pickle file should have been deleted"
+            assert not exists(
+                pickle_path
+            ), "Cached pickle file should have been deleted"
 
         seqs._load_or_create_fasta_dictionary_pickle()
         for pickle_path in seqs.fasta_dictionary_pickle_paths:
-            assert exists(pickle_path), \
-                "Cached pickle file should have been created"
+            assert exists(pickle_path), "Cached pickle file should have been created"
diff --git a/test/test_serialization.py b/test/test_serialization.py
index d90b6b7..40d2c9f 100644
--- a/test/test_serialization.py
+++ b/test/test_serialization.py
@@ -19,7 +19,7 @@
 from .data import (
     TP53_gene_id,
     custom_mouse_genome_grcm38_subset,
-    setup_init_custom_mouse_genome
+    setup_init_custom_mouse_genome,
 )
 
 
@@ -125,5 +125,6 @@ def test_species_to_pickle():
 @test_ensembl_releases()
 def test_unique_memory_address_of_unpickled_genomes(ensembl_genome):
     unpickled = pickle.loads(pickle.dumps(ensembl_genome))
-    assert ensembl_genome is unpickled, \
-        "Expected same object for %s but got two different instances" % (unpickled,)
+    assert (
+        ensembl_genome is unpickled
+    ), "Expected same object for %s but got two different instances" % (unpickled,)
diff --git a/test/test_string_representation.py b/test/test_string_representation.py
index e2d390c..80d6e48 100644
--- a/test/test_string_representation.py
+++ b/test/test_string_representation.py
@@ -11,16 +11,23 @@ def test_Locus_string_representation():
 
 def test_Gene_string_representation():
     gene = Gene(
-        gene_id="ENSG0001", gene_name="CAPITALISM",
-        biotype="protein_coding", contig="Y", start=1, end=5, strand="+",
-        genome=ensembl_grch37)
+        gene_id="ENSG0001",
+        gene_name="CAPITALISM",
+        biotype="protein_coding",
+        contig="Y",
+        start=1,
+        end=5,
+        strand="+",
+        genome=ensembl_grch37,
+    )
     string_repr = str(gene)
     expected = (
         "Gene(gene_id='ENSG0001',"
         " gene_name='CAPITALISM',"
         " biotype='protein_coding',"
         " contig='Y',"
-        " start=1, end=5, strand='+', genome='GRCh37')")
+        " start=1, end=5, strand='+', genome='GRCh37')"
+    )
     eq_(string_repr, expected)
 
 
@@ -34,7 +41,8 @@ def test_Transcript_string_representation():
         start=1,
         end=5,
         strand="+",
-        genome=ensembl_grch37)
+        genome=ensembl_grch37,
+    )
 
     expected = (
         "Transcript(transcript_id='ENST0001',"
@@ -57,7 +65,8 @@ def test_Exon_string_representation():
         contig="Y",
         start=1,
         end=5,
-        strand="+")
+        strand="+",
+    )
 
     expected = (
         "Exon(exon_id='ENSE0001',"
diff --git a/test/test_timings.py b/test/test_timings.py
index a948886..b0fd8e1 100644
--- a/test/test_timings.py
+++ b/test/test_timings.py
@@ -5,17 +5,21 @@
 ensembl = genome_for_reference_name("GRCh38")
 contigs = [str(i + 1) for i in range(22)] + ["X", "Y"]
 
+
 def make_repeat_lookup_fn(lookup_fn, n_positions):
     """
     Make a thunk which calls the lookup_fn at a number of loci
     for each human chromosome (excluding MT).
     """
+
     def repeat_lookup_fn():
         for contig in contigs:
-            for position in [10 ** 6 + i * 10 ** 6 for i in range(n_positions)]:
+            for position in [10**6 + i * 10**6 for i in range(n_positions)]:
                 lookup_fn(contig, position)
+
     return repeat_lookup_fn
 
+
 def run_benchmark(lookup_fn, n_positions_per_contig=20, time_limit=60.0):
     """
     Take a lookup functions (such as EnsemblRelease.genes_at_locus) and
@@ -24,31 +28,38 @@ def run_benchmark(lookup_fn, n_positions_per_contig=20, time_limit=60.0):
     repeat_lookup_fn = make_repeat_lookup_fn(lookup_fn, n_positions_per_contig)
     n_loci = n_positions_per_contig * len(contigs)
     name = lookup_fn.__name__
-    average_time = benchmark(
-        repeat_lookup_fn,
-        name="%s for %d loci" % (name, n_loci))
+    average_time = benchmark(repeat_lookup_fn, name="%s for %d loci" % (name, n_loci))
     print("-- %s : %0.4fs" % (name, average_time))
-    assert average_time < time_limit, \
-        "%s took too long for %s loci: %0.4fs" % (name, n_loci, average_time)
+    assert average_time < time_limit, "%s took too long for %s loci: %0.4fs" % (
+        name,
+        n_loci,
+        average_time,
+    )
     return average_time
 
+
 def test_timing_genes_at_locus():
     run_benchmark(ensembl.genes_at_locus)
 
+
 def test_timing_transcripts_at_locus():
     run_benchmark(ensembl.transcripts_at_locus)
 
+
 def test_timing_exons_at_locus():
     run_benchmark(ensembl.exons_at_locus)
 
+
 def test_timing_transcript_sequences_at_locus():
     def transcript_sequences_at_locus(contig, position):
         sequences = []
         for transcript in ensembl.transcripts_at_locus(contig, position):
             sequences.append(transcript.sequence)
         return sequences
+
     run_benchmark(transcript_sequences_at_locus)
 
+
 def test_timing_transcript_coding_sequences_at_locus():
     def transcript_coding_sequences_at_locus(contig, position):
         sequences = []
@@ -56,8 +67,10 @@ def transcript_coding_sequences_at_locus(contig, position):
             if transcript.sequence and transcript.complete:
                 sequences.append(transcript.coding_sequence)
         return sequences
+
     run_benchmark(transcript_coding_sequences_at_locus)
 
+
 def run_all_benchmarks():
     import types
 
@@ -69,5 +82,6 @@ def run_all_benchmarks():
             if isinstance(f, types.FunctionType):
                 f()
 
+
 if __name__ == "__main__":
     run_all_benchmarks()
diff --git a/test/test_transcript_ids.py b/test/test_transcript_ids.py
index 7868800..f1e910f 100644
--- a/test/test_transcript_ids.py
+++ b/test/test_transcript_ids.py
@@ -13,46 +13,51 @@
 
 # subset of transcript IDs for HLA-A
 HLA_A_TRANSCRIPT_IDS = [
-    'ENST00000396634',
-    'ENST00000376809',
-    'ENST00000376806',
-    'ENST00000376802',
-    'ENST00000496081',
-    'ENST00000495183',
-    'ENST00000461903',
-    'ENST00000479320',
+    "ENST00000396634",
+    "ENST00000376809",
+    "ENST00000376806",
+    "ENST00000376802",
+    "ENST00000496081",
+    "ENST00000495183",
+    "ENST00000461903",
+    "ENST00000479320",
 ]
 
+
 def test_transcript_ids_ensembl_grch38_hla_a():
     # chr6:29,945,884  is a position for HLA-A
     # based on:
     # http://useast.ensembl.org/Homo_sapiens/Gene/
     # Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
-    transcript_ids = grch38.transcript_ids_at_locus(
-        6, 29941260, 29945884)
+    transcript_ids = grch38.transcript_ids_at_locus(6, 29941260, 29945884)
     for transcript_id in HLA_A_TRANSCRIPT_IDS:
-        assert transcript_id in transcript_ids, \
+        assert transcript_id in transcript_ids, (
             "Transcript %s of HLA-A not found overlapping locus" % transcript_id
+        )
+
 
 KNOWN_TRANSCRIPT_IDS = HLA_A_TRANSCRIPT_IDS + [
-    'ENST00000398417',  # transcript ID of SMAD4-001
-    'ENST00000334701',  # transcript ID of HSP90AA1-001
-    'ENST00000599837',  # transcript ID of CTAG1A-002
+    "ENST00000398417",  # transcript ID of SMAD4-001
+    "ENST00000334701",  # transcript ID of HSP90AA1-001
+    "ENST00000599837",  # transcript ID of CTAG1A-002
 ]
 
+
 # TODO: add release 54 after transcript IDs for older GTFs are filled in
 # See https://github.com/hammerlab/pyensembl/issues/20
 @test_ensembl_releases(75, grch38.release)
 def test_all_transcript_ids(ensembl):
     transcript_ids = set(ensembl.transcript_ids())
     for transcript_id in KNOWN_TRANSCRIPT_IDS:
-        assert transcript_id in transcript_ids, \
-            "Missing transcript ID %s from %s" % (transcript_id, ensembl)
+        assert transcript_id in transcript_ids, "Missing transcript ID %s from %s" % (
+            transcript_id,
+            ensembl,
+        )
+
 
 def test_transcript_id_of_protein_id_CCR2():
     # Looked up on Oct 9 2021:
     # CCR2-203 ENST00000445132.3 maps to ENSP00000399285.2
     # Ensembl release 104, GRCh38.p13
-    transcript_id = grch38.transcript_id_of_protein_id(
-        "ENSP00000399285")
+    transcript_id = grch38.transcript_id_of_protein_id("ENSP00000399285")
     eq_("ENST00000445132", transcript_id)
diff --git a/test/test_transcript_objects.py b/test/test_transcript_objects.py
index 79d08d5..b8d5d58 100644
--- a/test/test_transcript_objects.py
+++ b/test/test_transcript_objects.py
@@ -23,27 +23,35 @@ def test_transcript_start_codon():
     test_transcript_start_codon : Check that fields Transcript
     (for transcript named CTNNBIP1-004) matches known values.
     """
-    CTNNBIP1_004_transcript = ensembl77.transcript_by_id(
-        CTNNBIP1_004_transcript_id)
+    CTNNBIP1_004_transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
 
-    assert Locus.__eq__(CTNNBIP1_004_transcript, CTNNBIP1_004_locus), \
-        "Expected locus %s but got %s" % (
-            CTNNBIP1_004_locus, Locus.__str__(CTNNBIP1_004_transcript))
+    assert Locus.__eq__(
+        CTNNBIP1_004_transcript, CTNNBIP1_004_locus
+    ), "Expected locus %s but got %s" % (
+        CTNNBIP1_004_locus,
+        Locus.__str__(CTNNBIP1_004_transcript),
+    )
 
     start_offsets = CTNNBIP1_004_transcript.start_codon_spliced_offsets
-    assert len(start_offsets) == 3, \
-        "Wrong length for start codon: %d (%s)" % (
-            len(start_offsets), start_offsets)
+    assert len(start_offsets) == 3, "Wrong length for start codon: %d (%s)" % (
+        len(start_offsets),
+        start_offsets,
+    )
 
-    assert all(isinstance(i, int) for i in start_offsets), \
-        "Wrong type %s for beginning start codon offset" % (
-            [type(i) for i in start_offsets],)
+    assert all(
+        isinstance(i, int) for i in start_offsets
+    ), "Wrong type %s for beginning start codon offset" % (
+        [type(i) for i in start_offsets],
+    )
 
     expected_start_codon_offset = len(CTNNBIP1_004_UTR5)
     start_codon_offset = min(start_offsets)
-    assert start_codon_offset == expected_start_codon_offset, \
-        "Incorrect start codon offset, expected %d but got %d" % (
-            expected_start_codon_offset, start_codon_offset)
+    assert (
+        start_codon_offset == expected_start_codon_offset
+    ), "Incorrect start codon offset, expected %d but got %d" % (
+        expected_start_codon_offset,
+        start_codon_offset,
+    )
 
 
 def test_transcript_exons():
@@ -53,24 +61,37 @@ def test_transcript_exons():
     """
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     exons = transcript.exons
-    assert isinstance(exons, list), \
-        "Expected list of Exon objects, got %s : %s" % (exons, type(exons))
+    assert isinstance(exons, list), "Expected list of Exon objects, got %s : %s" % (
+        exons,
+        type(exons),
+    )
 
     # CTTNBIP1-004 has 5 exons
-    assert len(exons) == len(CTTNNIP1_004_exon_lengths), \
-        "Expected %d exons but got %d" % (
-            len(CTTNNIP1_004_exon_lengths), len(exons))
+    assert len(exons) == len(
+        CTTNNIP1_004_exon_lengths
+    ), "Expected %d exons but got %d" % (len(CTTNNIP1_004_exon_lengths), len(exons))
 
     for i, exon in enumerate(exons):
         expected_id = CTTNNIP1_004_exon_ids[i]
-        assert exon.id == expected_id, \
-            "Expected exon #%d of %s to have ID %s but got %s" % (
-                i + 1, transcript, expected_id, exon.id)
+        assert (
+            exon.id == expected_id
+        ), "Expected exon #%d of %s to have ID %s but got %s" % (
+            i + 1,
+            transcript,
+            expected_id,
+            exon.id,
+        )
 
         expected_length = CTTNNIP1_004_exon_lengths[i]
-        assert len(exon) == expected_length, \
-            "Expected exon #%d of %s (%s) to have length %d but got %d" % (
-                i + 1, transcript, exon, expected_length, len(exon))
+        assert (
+            len(exon) == expected_length
+        ), "Expected exon #%d of %s (%s) to have length %d but got %d" % (
+            i + 1,
+            transcript,
+            exon,
+            expected_length,
+            len(exon),
+        )
 
 
 # not testing NCBI/Release 54 since I just discovered that ensembl54
@@ -106,39 +127,41 @@ def test_sequence_parts(genome):
     eq_(
         combined_sequence_length,
         len(transcript),
-        "Length 5' UTR(%dnt) + CDS(%dnt) + 3' UTR(%d) = %d, expected %d" % (
-            len(utr5),
-            len(cds),
-            len(utr3),
-            combined_sequence_length,
-            len(transcript)))
+        "Length 5' UTR(%dnt) + CDS(%dnt) + 3' UTR(%d) = %d, expected %d"
+        % (len(utr5), len(cds), len(utr3), combined_sequence_length, len(transcript)),
+    )
     eq_(
         combined_string,
         full_sequence,
-        "Expected FOXP3-001 sequence:\n%s\n\n5' UTR + CDS + 3' UTR:\n%s" % (
-            full_sequence,
-            combined_string))
+        "Expected FOXP3-001 sequence:\n%s\n\n5' UTR + CDS + 3' UTR:\n%s"
+        % (full_sequence, combined_string),
+    )
+
 
 def test_transcript_utr5_sequence_CTNNIP1_004():
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     utr5 = transcript.five_prime_utr_sequence
     expected_utr5_length = len(CTNNBIP1_004_UTR5)
-    eq_(len(utr5),
+    eq_(
+        len(utr5),
         expected_utr5_length,
-        "Expected 5' UTR length %d, got %d" % (
-            expected_utr5_length, len(utr5)))
+        "Expected 5' UTR length %d, got %d" % (expected_utr5_length, len(utr5)),
+    )
     eq_(utr5, CTNNBIP1_004_UTR5)
 
+
 def test_transcript_utr3_sequence_CTNNIP1_004():
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     utr3 = transcript.three_prime_utr_sequence
     expected_utr3_length = len(CTNNBIP1_004_UTR3)
-    eq_(len(utr3),
+    eq_(
+        len(utr3),
         expected_utr3_length,
-        "Expected 3' UTR length %d, got %d" % (
-            expected_utr3_length, len(utr3)))
+        "Expected 3' UTR length %d, got %d" % (expected_utr3_length, len(utr3)),
+    )
     eq_(utr3, CTNNBIP1_004_UTR3)
 
+
 def test_transcript_cds_CTNNIP1_004():
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     cds = transcript.coding_sequence
@@ -146,9 +169,11 @@ def test_transcript_cds_CTNNIP1_004():
     eq_(
         len(cds),
         expected_cds_length,
-        "Expected CDS length %d, got %d" % (expected_cds_length, len(cds)))
+        "Expected CDS length %d, got %d" % (expected_cds_length, len(cds)),
+    )
     eq_(cds, CTNNBIP1_004_CDS)
 
+
 @test_ensembl_releases()
 def test_equal_transcripts(genome):
     t1 = genome.genes_by_name("TP53")[0].transcripts[0]
@@ -157,29 +182,37 @@ def test_equal_transcripts(genome):
     eq_(t1, t2)
     eq_(hash(t1), hash(t2))
 
+
 @test_ensembl_releases()
 def test_not_equal_transcripts(genome):
     t1 = genome.genes_by_name("MUC1")[0].transcripts[0]
     t2 = genome.genes_by_name("BRCA1")[0].transcripts[0]
     assert_not_equal(t1, t2)
 
+
 def test_protein_id():
     transcript = ensembl77.transcripts_by_name("EGFR-001")[0]
     eq_(transcript.protein_id, "ENSP00000275493")
 
+
 def test_protein_protein_sequence():
     transcript = ensembl77.transcripts_by_name("EGFR-001")[0]
     eq_(transcript.protein_sequence, EGFR_001_protein_sequence)
 
+
 def test_transcript_gene_should_match_parent_gene():
     gene = ensembl77.gene_by_id(TP53_gene_id)
     for transcript in gene.transcripts:
         eq_(transcript.gene, gene)
 
+
 @test_ensembl_releases()
 def test_BRCA1_201_has_protein_coding_biotype(genome):
     transcript = genome.transcripts_by_name("BRCA1-201")[0]
-    assert transcript.is_protein_coding, \
-        "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s" % (
-            transcript, genome)
+    assert (
+        transcript.is_protein_coding
+    ), "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s" % (
+        transcript,
+        genome,
+    )
     eq_(transcript.biotype, "protein_coding")
diff --git a/test/test_transcript_sequences.py b/test/test_transcript_sequences.py
index 529e599..f654a3a 100644
--- a/test/test_transcript_sequences.py
+++ b/test/test_transcript_sequences.py
@@ -9,6 +9,7 @@
 
 grch38 = genome_for_reference_name("GRCh38")
 
+
 def test_transcript_sequence_ensembl_grch38():
     # extremely short TRD gene
     seq = grch38.transcript_sequence("ENST00000448914")
diff --git a/test/test_transcript_support_level.py b/test/test_transcript_support_level.py
index 0d9cabd..6bf8a40 100644
--- a/test/test_transcript_support_level.py
+++ b/test/test_transcript_support_level.py
@@ -8,11 +8,12 @@
 
 from pyensembl import cached_release
 
+
 def test_transcript_support_level():
-    """ The Transcript Support Level (TSL) is a method to highlight the well-supported and poorly-supported transcript
-        models for users, based on the type and quality of the alignments used to annotate the transcript.
-        In the Ensembl database, it can be assigned to a value 1 through 5, or reported as NA, or missing, or missing
-        completely in older releases. We translate it to an integer value, otherwise to None.
+    """The Transcript Support Level (TSL) is a method to highlight the well-supported and poorly-supported transcript
+    models for users, based on the type and quality of the alignments used to annotate the transcript.
+    In the Ensembl database, it can be assigned to a value 1 through 5, or reported as NA, or missing, or missing
+    completely in older releases. We translate it to an integer value, otherwise to None.
     """
     ensembl93 = cached_release(93)
     transcript = ensembl93.transcripts_by_name("DDX11L1-202")[0]
diff --git a/test/test_ucsc_gtf.py b/test/test_ucsc_gtf.py
index 3e4a9e5..7cecde5 100644
--- a/test/test_ucsc_gtf.py
+++ b/test/test_ucsc_gtf.py
@@ -8,16 +8,14 @@
 UCSC_GENCODE_PATH = data_path("gencode.ucsc.small.gtf")
 UCSC_REFSEQ_PATH = data_path("refseq.ucsc.small.gtf")
 
+
 def test_ucsc_gencode_gtf():
     with TemporaryDirectory() as tmpdir:
-        db = Database(
-            UCSC_GENCODE_PATH,
-            cache_directory_path=tmpdir)
+        db = Database(UCSC_GENCODE_PATH, cache_directory_path=tmpdir)
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 12 exons from the dataframe
-        assert len(exons) == 12, \
-            "Expected 12 exons, got %d: %s" % (len(exons), exons)
+        assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (len(exons), exons)
 
 
 def test_ucsc_gencode_genome():
@@ -30,22 +28,22 @@ def test_ucsc_gencode_genome():
             reference_name="GRCh38",
             annotation_name="ucsc_test",
             gtf_path_or_url=UCSC_GENCODE_PATH,
-            cache_directory_path=tmpdir)
+            cache_directory_path=tmpdir,
+        )
         genome.index()
         genes = genome.genes()
         for gene in genes:
-            assert gene.id, \
-                "Gene with missing ID in %s" % (genome.gtf.dataframe(),)
-        assert len(genes) == 7, \
-            "Expected 7 genes, got %d: %s" % (
-                len(genes), genes)
+            assert gene.id, "Gene with missing ID in %s" % (genome.gtf.dataframe(),)
+        assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (len(genes), genes)
         transcripts = genome.transcripts()
         for transcript in transcripts:
-            assert transcript.id, \
-                "Transcript with missing ID in %s" % (genome.gtf.dataframe(),)
-        assert len(transcripts) == 7, \
-            "Expected 7 transcripts, got %d: %s" % (
-                len(transcripts), transcripts)
+            assert transcript.id, "Transcript with missing ID in %s" % (
+                genome.gtf.dataframe(),
+            )
+        assert len(transcripts) == 7, "Expected 7 transcripts, got %d: %s" % (
+            len(transcripts),
+            transcripts,
+        )
 
         gene_uc001aak4 = genome.gene_by_id("uc001aak.4")
         eq_(gene_uc001aak4.id, "uc001aak.4")
@@ -58,21 +56,18 @@ def test_ucsc_gencode_genome():
         transcript_1_30564 = genome.transcripts_at_locus("chr1", 30564)
         eq_(transcript_1_30564[0].id, "uc057aty.1")
 
+
 def test_ucsc_refseq_gtf():
     """
     Test GTF object with a small RefSeq GTF file downloaded from
     http://genome.ucsc.edu/cgi-bin/hgTables
     """
     with TemporaryDirectory() as tmpdir:
-        db = Database(
-            UCSC_REFSEQ_PATH,
-            cache_directory_path=tmpdir)
+        db = Database(UCSC_REFSEQ_PATH, cache_directory_path=tmpdir)
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 16 exons from the GTF
-        assert len(exons) == 16, \
-            "Expected 16 exons, got %d: %s" % (
-                len(exons), exons)
+        assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (len(exons), exons)
 
 
 def test_ucsc_refseq_genome():
@@ -85,25 +80,30 @@ def test_ucsc_refseq_genome():
             reference_name="GRCh38",
             annotation_name="ucsc_test",
             gtf_path_or_url=UCSC_REFSEQ_PATH,
-            cache_directory_path=tmpdir)
+            cache_directory_path=tmpdir,
+        )
         genome.index()
         genes = genome.genes()
         for gene in genes:
-            assert gene.id, \
-                "Gene with missing ID in %s" % (genome.db._load_gtf_as_dataframe(),)
-        assert len(genes) == 2, \
-            "Expected 2 genes, got %d: %s" % (
-                len(genes), genes)
+            assert gene.id, "Gene with missing ID in %s" % (
+                genome.db._load_gtf_as_dataframe(),
+            )
+        assert len(genes) == 2, "Expected 2 genes, got %d: %s" % (len(genes), genes)
         transcripts = genome.transcripts()
         for transcript in transcripts:
-            assert transcript.id, \
-                "Transcript with missing ID in %s" % (genome.db._load_gtf_as_dataframe(),)
-        assert len(transcripts) == 2, \
-            "Expected 2 transcripts, got %d: %s" % (
-                len(transcripts), transcripts)
+            assert transcript.id, "Transcript with missing ID in %s" % (
+                genome.db._load_gtf_as_dataframe(),
+            )
+        assert len(transcripts) == 2, "Expected 2 transcripts, got %d: %s" % (
+            len(transcripts),
+            transcripts,
+        )
         genes_at_locus = genome.genes_at_locus("chr1", 67092176)
-        assert len(genes_at_locus) == 2, \
-            "Expected 2 genes at locus chr1:67092176, got %d: %s" % (
-                len(genes_at_locus), genes_at_locus)
+        assert (
+            len(genes_at_locus) == 2
+        ), "Expected 2 genes at locus chr1:67092176, got %d: %s" % (
+            len(genes_at_locus),
+            genes_at_locus,
+        )
         ids = set([gene.id for gene in genes_at_locus])
         eq_(set(["NM_001276352", "NR_075077"]), ids)

From 7c581a879c46a9a503876ee5e3f442766f04ae30 Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Fri, 29 Dec 2023 12:16:59 -0600
Subject: [PATCH 09/35] format and release

---
 pyensembl/species.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyensembl/species.py b/pyensembl/species.py
index c19f359..7af4569 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -335,8 +335,8 @@ def check_species_object(species_name_or_object):
         # "WS180": (47, 49),
         # "WS190": (50, 54),
         "WS200": (55, 57),
-        "WS210": (58, 78),
-        "WS220": (79, 66),
+        "WS210": (58, 60),
+        "WS220": (61, 66),
         "WBcel235": (67, MAX_ENSEMBL_RELEASE),
     },
 )

From 3819ea2e5544897488e205fe5a9f8b7a09ff34c6 Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Fri, 29 Dec 2023 12:56:01 -0600
Subject: [PATCH 10/35] add comand to list all available

---
 pyensembl/shell.py | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/pyensembl/shell.py b/pyensembl/shell.py
index cd7ab3c..2cc56ee 100755
--- a/pyensembl/shell.py
+++ b/pyensembl/shell.py
@@ -40,14 +40,17 @@
 
 import argparse
 import logging.config
-import pkg_resources
 import os
 
-from .ensembl_release import EnsemblRelease, MAX_ENSEMBL_RELEASE
+import pkg_resources
+
+from .ensembl_release import MAX_ENSEMBL_RELEASE, EnsemblRelease
 from .genome import Genome
 from .species import Species
 
-logging.config.fileConfig(pkg_resources.resource_filename(__name__, "logging.conf"))
+logging.config.fileConfig(
+    pkg_resources.resource_filename(__name__, "logging.conf")
+)
 logger = logging.getLogger(__name__)
 
 
@@ -94,7 +97,9 @@
 )
 
 path_group.add_argument(
-    "--annotation-name", default=None, help="Name of annotation source (e.g. refseq)"
+    "--annotation-name",
+    default=None,
+    help="Name of annotation source (e.g. refseq)",
 )
 
 path_group.add_argument(
@@ -140,6 +145,7 @@
         "delete-all-files",
         "delete-index-files",
         "list",
+        "available",
     ),
     help=(
         '"install" will download and index any data that is  not '
@@ -151,6 +157,20 @@
 )
 
 
+def collect_all_available_ensembl_releases():
+    for species_name in Species.all_registered_latin_names():
+        species = Species._latin_names_to_species[species_name]
+        # print in tree format
+        print(
+            "* " + species_name + " (" + ",".join(species.synonyms) + ")" + ":"
+        )
+        for (
+            release_name,
+            release_range,
+        ) in species.reference_assemblies.items():
+            print("  * " + release_name + ":", release_range)
+
+
 def collect_all_installed_ensembl_releases():
     genomes = []
     for species, release in Species.all_species_release_pairs():
@@ -182,11 +202,13 @@ def all_combinations_of_ensembl_genomes(args):
                 # URL to be a directory with all the same filenames as
                 # would be provided by Ensembl
                 gtf_url = os.path.join(
-                    args.custom_mirror, os.path.basename(ensembl_release.gtf_url)
+                    args.custom_mirror,
+                    os.path.basename(ensembl_release.gtf_url),
                 )
                 transcript_fasta_urls = [
                     os.path.join(
-                        args.custom_mirror, os.path.basename(transcript_fasta_url)
+                        args.custom_mirror,
+                        os.path.basename(transcript_fasta_url),
                     )
                     for transcript_fasta_url in ensembl_release.transcript_fasta_urls
                 ]
@@ -244,7 +266,9 @@ def collect_selected_genomes(args):
 
 def run():
     args = parser.parse_args()
-    if args.action == "list":
+    if args.action == "available":
+        collect_all_available_ensembl_releases()
+    elif args.action == "list":
         # TODO: how do we also identify which non-Ensembl genomes are
         # installed?
         genomes = collect_all_installed_ensembl_releases()

From ed1de05908cc7d0b0c2834f0852142627a0ce3c2 Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Fri, 29 Dec 2023 13:01:55 -0600
Subject: [PATCH 11/35] add comand to list all available

---
 pyensembl/shell.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pyensembl/shell.py b/pyensembl/shell.py
index 2cc56ee..66f874c 100755
--- a/pyensembl/shell.py
+++ b/pyensembl/shell.py
@@ -162,7 +162,12 @@ def collect_all_available_ensembl_releases():
         species = Species._latin_names_to_species[species_name]
         # print in tree format
         print(
-            "* " + species_name + " (" + ",".join(species.synonyms) + ")" + ":"
+            "* "
+            + species_name
+            + " ("
+            + ", ".join(species.synonyms)
+            + ")"
+            + ":"
         )
         for (
             release_name,

From 1426100e47478308c805556f6000db17f955033d Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Fri, 29 Dec 2023 13:03:44 -0600
Subject: [PATCH 12/35] add comand to list all available

---
 pyensembl/species.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/pyensembl/species.py b/pyensembl/species.py
index 7af4569..c153f5e 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -100,7 +100,8 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome" % i
+                        "Ensembl release %d already has an associated genome"
+                        % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -113,10 +114,13 @@ def which_reference(self, ensembl_release):
         return self._release_to_genome[ensembl_release]
 
     def __str__(self):
-        return "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)" % (
-            self.latin_name,
-            self.synonyms,
-            self.reference_assemblies,
+        return (
+            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)"
+            % (
+                self.latin_name,
+                self.synonyms,
+                self.reference_assemblies,
+            )
         )
 
     def __eq__(self, other):
@@ -188,9 +192,9 @@ def check_species_object(species_name_or_object):
     latin_name="homo_sapiens",
     synonyms=["human"],
     reference_assemblies={
-        "GRCh38": (76, MAX_ENSEMBL_RELEASE),
-        "GRCh37": (55, 75),
         "NCBI36": (54, 54),
+        "GRCh37": (55, 75),
+        "GRCh38": (76, MAX_ENSEMBL_RELEASE),
     },
 )
 

From e03a213ccedda997ecb868490e62367f779f8c9a Mon Sep 17 00:00:00 2001
From: Ye Chang <yech1990@gmail.com>
Date: Tue, 2 Jan 2024 04:13:56 -0600
Subject: [PATCH 13/35] fix bug

---
 pyensembl/genome.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index 5345742..e56dd2e 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -896,8 +896,10 @@ def transcript_by_id(self, transcript_id):
                 extra_data = dict(zip(extra_field_names, result[5:]))
                 transcript_name = extra_data.get("transcript_name")
                 transcript_biotype = extra_data.get("transcript_biotype")
-                tsl = extra_data.get("transcript_support_level")
-                if not tsl or tsl == "NA":
+                tsl = extra_data.get("transcript_support_level", "NA")
+                if tsl:
+                    tsl = tsl.split(" ")[0]
+                if not tsl or tsl == "NA" or not tsl.isnumeric():
                     tsl = None
                 else:
                     tsl = int(tsl)

From c6915d1c33879973c7a7539037b9941cbb4950d1 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 19:27:22 -0600
Subject: [PATCH 14/35] support plants

---
 pyensembl/download_cache.py           |  50 ++++++----
 pyensembl/ensembl_release.py          |  49 +++++++---
 pyensembl/ensembl_release_versions.py |   2 +
 pyensembl/ensembl_url_templates.py    | 136 ++++++++++++++++++--------
 pyensembl/shell.py                    |  37 ++++---
 pyensembl/species.py                  |  45 ++++++---
 6 files changed, 221 insertions(+), 98 deletions(-)

diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py
index c33d6fe..48ebd00 100644
--- a/pyensembl/download_cache.py
+++ b/pyensembl/download_cache.py
@@ -11,14 +11,13 @@
 # limitations under the License.
 
 
+import logging
 from os import listdir, remove
-from os.path import join, exists, split, abspath, isdir
+from os.path import abspath, exists, isdir, join, split
 from shutil import copy2, rmtree
-import logging
 
 import datacache
 
-
 logger = logging.getLogger(__name__)
 
 CACHE_BASE_SUBDIR = "pyensembl"
@@ -29,9 +28,11 @@ def cache_subdirectory(
     reference_name=None, annotation_name=None, annotation_version=None
 ):
     """
-    Which cache subdirectory to use for a given annotation database
-    over a particular reference. All arguments can be omitted to just get
-    the base subdirectory for all pyensembl cached datasets.
+    Which cache subdirectory to use for a given annotation database over a
+    particular reference.
+
+    All arguments can be omitted to just get the base subdirectory for
+    all pyensembl cached datasets.
     """
     if reference_name is None:
         reference_name = ""
@@ -135,7 +136,7 @@ def cache_directory_path(self):
 
     def _fields(self):
         """
-        Fields used for hashing, string representation, equality comparison
+        Fields used for hashing, string representation, equality comparison.
         """
         return (
             (
@@ -150,7 +151,10 @@ def _fields(self):
         )
 
     def __eq__(self, other):
-        return other.__class__ is DownloadCache and self._fields() == other._fields()
+        return (
+            other.__class__ is DownloadCache
+            and self._fields() == other._fields()
+        )
 
     def __hash__(self):
         return hash(self._fields())
@@ -202,7 +206,9 @@ def cached_path(self, path_or_url):
             # for stripping decompression extensions for both local
             # and remote files
             local_filename = datacache.build_local_filename(
-                download_url=path_or_url, filename=remote_filename, decompress=False
+                download_url=path_or_url,
+                filename=remote_filename,
+                decompress=False,
             )
         else:
             local_filename = remote_filename
@@ -210,10 +216,14 @@ def cached_path(self, path_or_url):
         # if we expect the download function to decompress this file then
         # we should use its name without the compression extension
         if self.decompress_on_download:
-            local_filename = self._remove_compression_suffix_if_present(local_filename)
+            local_filename = self._remove_compression_suffix_if_present(
+                local_filename
+            )
 
         if len(local_filename) == 0:
-            raise ValueError("Can't determine local filename for %s" % (path_or_url,))
+            raise ValueError(
+                "Can't determine local filename for %s" % (path_or_url,)
+            )
 
         return join(self.cache_directory_path, local_filename)
 
@@ -254,8 +264,8 @@ def download_or_copy_if_necessary(
         self, path_or_url, download_if_missing=False, overwrite=False
     ):
         """
-        Download a remote file or copy
-        Get the local path to a possibly remote file.
+        Download a remote file or copy Get the local path to a possibly remote
+        file.
 
         Download if file is missing from the cache directory and
         `download_if_missing` is True. Download even if local file exists if
@@ -295,7 +305,11 @@ def _raise_missing_file_error(self, missing_urls_dict):
         raise ValueError(error_message)
 
     def local_path_or_install_error(
-        self, field_name, path_or_url, download_if_missing=False, overwrite=False
+        self,
+        field_name,
+        path_or_url,
+        download_if_missing=False,
+        overwrite=False,
     ):
         try:
             return self.download_or_copy_if_necessary(
@@ -308,13 +322,13 @@ def local_path_or_install_error(
 
     def delete_cached_files(self, prefixes=[], suffixes=[]):
         """
-        Deletes any cached files matching the prefixes or suffixes given
+        Deletes any cached files matching the prefixes or suffixes given.
         """
         if isdir(self.cache_directory_path):
             for filename in listdir():
-                delete = any([filename.endswith(ext) for ext in suffixes]) or any(
-                    [filename.startswith(pre) for pre in prefixes]
-                )
+                delete = any(
+                    [filename.endswith(ext) for ext in suffixes]
+                ) or any([filename.startswith(pre) for pre in prefixes])
                 if delete:
                     path = join(self.cache_directory_path, filename)
                     logger.info("Deleting %s", path)
diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 8ad47ab..8af2584 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -11,22 +11,25 @@
 # limitations under the License.
 
 """
-Contains the EnsemblRelease class, which extends the Genome class
-to be specific to (a particular release of) Ensembl.
+Contains the EnsemblRelease class, which extends the Genome class to be
+specific to (a particular release of) Ensembl.
 """
 from weakref import WeakValueDictionary
 
+from .ensembl_release_versions import MAX_ENSEMBL_RELEASE, check_release_number
+from .ensembl_url_templates import (
+    ENSEMBL_FTP_SERVER,
+    make_fasta_url,
+    make_gtf_url,
+)
 from .genome import Genome
-from .ensembl_release_versions import check_release_number, MAX_ENSEMBL_RELEASE
 from .species import check_species_object, human
 
-from .ensembl_url_templates import ENSEMBL_FTP_SERVER, make_gtf_url, make_fasta_url
-
 
 class EnsemblRelease(Genome):
     """
-    Bundles together the genomic annotation and sequence data associated with
-    a particular release of the Ensembl database.
+    Bundles together the genomic annotation and sequence data associated with a
+    particular release of the Ensembl database.
     """
 
     @classmethod
@@ -47,7 +50,11 @@ def normalize_init_values(cls, release, species, server):
 
     @classmethod
     def cached(
-        cls, release=MAX_ENSEMBL_RELEASE, species=human, server=ENSEMBL_FTP_SERVER
+        cls,
+        release=MAX_ENSEMBL_RELEASE,
+        species=human,
+        server=None,
+        # server=ENSEMBL_FTP_SERVER,
     ):
         """
         Construct EnsemblRelease if it's never been made before, otherwise
@@ -61,14 +68,21 @@ def cached(
         return genome
 
     def __init__(
-        self, release=MAX_ENSEMBL_RELEASE, species=human, server=ENSEMBL_FTP_SERVER
+        self,
+        release=MAX_ENSEMBL_RELEASE,
+        species=human,
+        server=None,
+        # ENSEMBL_FTP_SERVER,
     ):
         self.release, self.species, self.server = self.normalize_init_values(
             release=release, species=species, server=server
         )
 
         self.gtf_url = make_gtf_url(
-            ensembl_release=self.release, species=self.species, server=self.server
+            ensembl_release=self.release,
+            species=self.species.latin_name,
+            server=self.server,
+            database=self.species.database,
         )
 
         self.transcript_fasta_urls = [
@@ -77,12 +91,14 @@ def __init__(
                 species=self.species.latin_name,
                 sequence_type="cdna",
                 server=server,
+                database=self.species.database,
             ),
             make_fasta_url(
                 ensembl_release=self.release,
                 species=self.species.latin_name,
                 sequence_type="ncrna",
                 server=server,
+                database=self.species.database,
             ),
         ]
 
@@ -92,6 +108,7 @@ def __init__(
                 species=self.species.latin_name,
                 sequence_type="pep",
                 server=self.server,
+                database=self.species.database,
             )
         ]
 
@@ -130,7 +147,11 @@ def __hash__(self):
         return hash((self.release, self.species))
 
     def to_dict(self):
-        return {"release": self.release, "species": self.species, "server": self.server}
+        return {
+            "release": self.release,
+            "species": self.species,
+            "server": self.server,
+        }
 
     @classmethod
     def from_dict(cls, state_dict):
@@ -144,7 +165,9 @@ def cached_release(release, species="human"):
     """
     Create an EnsemblRelease instance only if it's hasn't already been made,
     otherwise returns the old instance.
-    Keeping this function for backwards compatibility but this functionality
-    has been moving into the cached method of EnsemblRelease.
+
+    Keeping this function for backwards compatibility but this
+    functionality has been moving into the cached method of
+    EnsemblRelease.
     """
     return EnsemblRelease.cached(release=release, species=species)
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 79649bd..020c6e6 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -12,6 +12,8 @@
 
 MIN_ENSEMBL_RELEASE = 54
 MAX_ENSEMBL_RELEASE = 110
+MIN_ENSEMBLGENOME_RELEASE = 50
+MAX_ENSEMBLGENOME_RELEASE = 57
 
 
 def check_release_number(release):
diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index ded3570..298e517 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -11,19 +11,23 @@
 # limitations under the License.
 
 """
-Templates for URLs and paths to specific relase, species, and file type
-on the Ensembl ftp server.
+Templates for URLs and paths to specific relase, species, and file type on the
+Ensembl ftp server.
 
 For example, the human chromosomal DNA sequences for release 78 are in:
 
     https://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/dna/
 
+For plant, fungi and metazoa species, the url is as follow:
+
+    https://ftp.ensemblgenomes.ebi.ac.uk/pub/release-57/plants/fasta/glycine_max/cdna/
 """
 
-from .species import Species, find_species_by_name
 from .ensembl_release_versions import check_release_number
+from .species import Species, find_species_by_name
 
 ENSEMBL_FTP_SERVER = "https://ftp.ensembl.org"
+ENSEMBLGENOME_FTP_SERVER = "https://ftp.ensemblgenomes.ebi.ac.uk"
 
 # Example directories
 # FASTA files: /pub/release-78/fasta/homo_sapiens/
@@ -31,6 +35,39 @@
 FASTA_SUBDIR_TEMPLATE = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
 GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
 
+DATABASE_FASTA_SUBDIR_TEMPLATE = (
+    "/pub/release-%(release)d/$(database)s/fasta/%(species)s/%(type)s/"
+)
+DATABASE_GTF_SUBDIR_TEMPLATE = (
+    "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
+)
+
+# GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
+GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
+
+# cDNA & protein FASTA file for releases before (and including) Ensembl 75
+# example: Homo_sapiens.NCBI36.54.cdna.all.fa.gz
+OLD_FASTA_FILENAME_TEMPLATE = (
+    "%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz"
+)
+
+# ncRNA FASTA file for releases before (and including) Ensembl 75
+# example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
+
+OLD_FASTA_FILENAME_TEMPLATE_NCRNA = (
+    "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
+)
+
+# cDNA & protein FASTA file for releases after Ensembl 75
+# example: Homo_sapiens.GRCh37.cdna.all.fa.gz
+NEW_FASTA_FILENAME_TEMPLATE = (
+    "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
+)
+
+# ncRNA FASTA file for releases after Ensembl 75
+# example: Homo_sapiens.GRCh37.ncrna.fa.gz
+NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
+
 
 def normalize_release_properties(ensembl_release, species):
     """
@@ -44,14 +81,10 @@ def normalize_release_properties(ensembl_release, species):
     return ensembl_release, species.latin_name, reference_name
 
 
-# GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
-GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
-
-
 def make_gtf_filename(ensembl_release, species):
     """
     Return GTF filename expect on Ensembl FTP server for a specific
-    species/release combination
+    species/release combination.
     """
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
@@ -63,36 +96,36 @@ def make_gtf_filename(ensembl_release, species):
     }
 
 
-def make_gtf_url(ensembl_release, species, server=ENSEMBL_FTP_SERVER):
+def make_gtf_url(ensembl_release, species, server=None, database=None):
     """
     Returns a URL and a filename, which can be joined together.
     """
-    ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
-    subdir = GTF_SUBDIR_TEMPLATE % {"release": ensembl_release, "species": species}
-    filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
+    if server is None:
+        if database is None:
+            server = ENSEMBL_FTP_SERVER
+        else:
+            server = ENSEMBLGENOME_FTP_SERVER
+    ensembl_release, species, _ = normalize_release_properties(
+        ensembl_release, species
+    )
+    if database is None:
+        subdir = GTF_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "species": species,
+        }
+    else:
+        print(ensembl_release, species, database)
+        subdir = DATABASE_GTF_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "database": database,
+            "species": species,
+        }
+    filename = make_gtf_filename(
+        ensembl_release=ensembl_release, species=species
+    )
     return server + subdir + filename
 
 
-# cDNA & protein FASTA file for releases before (and including) Ensembl 75
-# example: Homo_sapiens.NCBI36.54.cdna.all.fa.gz
-OLD_FASTA_FILENAME_TEMPLATE = (
-    "%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz"
-)
-
-# ncRNA FASTA file for releases before (and including) Ensembl 75
-# example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
-
-OLD_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
-
-# cDNA & protein FASTA file for releases after Ensembl 75
-# example: Homo_sapiens.GRCh37.cdna.all.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
-
-# ncRNA FASTA file for releases after Ensembl 75
-# example: Homo_sapiens.GRCh37.ncrna.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.ncrna.fa.gz"
-
-
 def make_fasta_filename(ensembl_release, species, sequence_type):
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
@@ -125,23 +158,46 @@ def make_fasta_filename(ensembl_release, species, sequence_type):
             }
 
 
-def make_fasta_url(ensembl_release, species, sequence_type, server=ENSEMBL_FTP_SERVER):
-    """Construct URL to FASTA file with cDNA transcript or protein sequences
+def make_fasta_url(
+    ensembl_release,
+    species,
+    sequence_type,
+    server=None,
+    database=None,
+):
+    """
+    Construct URL to FASTA file with cDNA transcript or protein sequences.
 
     Parameter examples:
         ensembl_release = 75
         species = "Homo_sapiens"
         sequence_type = "cdna" (other option: "pep")
     """
+    if server is None:
+        if database is None:
+            server = ENSEMBL_FTP_SERVER
+        else:
+            server = ENSEMBLGENOME_FTP_SERVER
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
     )
-    subdir = FASTA_SUBDIR_TEMPLATE % {
-        "release": ensembl_release,
-        "species": species,
-        "type": sequence_type,
-    }
+    if database is None:
+        subdir = FASTA_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "species": species,
+            "type": sequence_type,
+        }
+    else:
+        subdir = DATABASE_FASTA_SUBDIR_TEMPLATE % {
+            "release": ensembl_release,
+            "database": database,
+            "species": species,
+            "type": sequence_type,
+        }
+
     filename = make_fasta_filename(
-        ensembl_release=ensembl_release, species=species, sequence_type=sequence_type
+        ensembl_release=ensembl_release,
+        species=species,
+        sequence_type=sequence_type,
     )
     return server + subdir + filename
diff --git a/pyensembl/shell.py b/pyensembl/shell.py
index 66f874c..0a7a54f 100755
--- a/pyensembl/shell.py
+++ b/pyensembl/shell.py
@@ -30,6 +30,9 @@
 To list all installed genomes:
     %(prog)s list
 
+To list all available genomes:
+    %(prog)s available
+
 To install a genome from source files:
     %(prog)s install \
  --reference-name "GRCh38" \
@@ -46,11 +49,9 @@
 
 from .ensembl_release import MAX_ENSEMBL_RELEASE, EnsemblRelease
 from .genome import Genome
-from .species import Species
+from .species import Species, normalize_species_name
 
-logging.config.fileConfig(
-    pkg_resources.resource_filename(__name__, "logging.conf")
-)
+logging.config.fileConfig(pkg_resources.resource_filename(__name__, "logging.conf"))
 logger = logging.getLogger(__name__)
 
 
@@ -161,14 +162,7 @@ def collect_all_available_ensembl_releases():
     for species_name in Species.all_registered_latin_names():
         species = Species._latin_names_to_species[species_name]
         # print in tree format
-        print(
-            "* "
-            + species_name
-            + " ("
-            + ", ".join(species.synonyms)
-            + ")"
-            + ":"
-        )
+        print("* " + species_name + " (" + ", ".join(species.synonyms) + ")" + ":")
         for (
             release_name,
             release_range,
@@ -189,11 +183,26 @@ def all_combinations_of_ensembl_genomes(args):
     """
     Use all combinations of species and release versions specified by the
     commandline arguments to return a list of EnsemblRelease or Genome objects.
-    The results will typically be of type EnsemblRelease unless the
+    The results will typically be of type EnsemblRelease unless the.
+
     --custom-mirror argument was given.
     """
     species_list = args.species if args.species else ["human"]
-    release_list = args.release if args.release else [MAX_ENSEMBL_RELEASE]
+
+    release_list = (
+        args.release
+        if args.release
+        else [
+            max(
+                i
+                for _, i in Species._latin_names_to_species[
+                    normalize_species_name(species_name)
+                ].reference_assemblies.values()
+            )
+            for species_name in species_list
+        ]
+    )
+
     genomes = []
     for species in species_list:
         # Otherwise, use Ensembl release information
diff --git a/pyensembl/species.py b/pyensembl/species.py
index c153f5e..4249234 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -12,7 +12,10 @@
 
 from serializable import Serializable
 
-from .ensembl_release_versions import MAX_ENSEMBL_RELEASE
+from .ensembl_release_versions import (
+    MAX_ENSEMBL_RELEASE,
+    MAX_ENSEMBLGENOME_RELEASE,
+)
 
 # TODO: replace Serializable with data class
 
@@ -30,15 +33,16 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(cls, latin_name, synonyms, reference_assemblies):
+    def register(cls, latin_name, synonyms, reference_assemblies, database=None):
         """
-        Create a Species object from the given arguments and enter into
-        all the dicts used to look the species up by its fields.
+        Create a Species object from the given arguments and enter into all the
+        dicts used to look the species up by its fields.
         """
         species = Species(
             latin_name=latin_name,
             synonyms=synonyms,
             reference_assemblies=reference_assemblies,
+            database=database,
         )
         cls._latin_names_to_species[species.latin_name] = species
         for synonym in synonyms:
@@ -71,8 +75,8 @@ def all_registered_latin_names(cls):
     @classmethod
     def all_species_release_pairs(cls):
         """
-        Generator which yields (species, release) pairs
-        for all possible combinations.
+        Generator which yields (species, release) pairs for all possible
+        combinations.
         """
         for species_name in cls.all_registered_latin_names():
             species = cls._latin_names_to_species[species_name]
@@ -80,7 +84,7 @@ def all_species_release_pairs(cls):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
+    def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=None):
         """
         Parameters
         ----------
@@ -95,13 +99,13 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}):
         self.latin_name = latin_name.lower().replace(" ", "_")
         self.synonyms = synonyms
         self.reference_assemblies = reference_assemblies
+        self.database = database
         self._release_to_genome = {}
         for genome_name, (start, end) in self.reference_assemblies.items():
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome"
-                        % i
+                        "Ensembl release %d already has an associated genome" % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -115,11 +119,12 @@ def which_reference(self, ensembl_release):
 
     def __str__(self):
         return (
-            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s)"
+            "Species(latin_name='%s', synonyms=%s, reference_assemblies=%s, database=%s)"
             % (
                 self.latin_name,
                 self.synonyms,
                 self.reference_assemblies,
+                self.database,
             )
         )
 
@@ -129,6 +134,7 @@ def __eq__(self, other):
             and self.latin_name == other.latin_name
             and self.synonyms == other.synonyms
             and self.reference_assemblies == other.reference_assemblies
+            and self.database == other.database
         )
 
     def to_dict(self):
@@ -144,15 +150,17 @@ def __hash__(self):
                 self.latin_name,
                 tuple(self.synonyms),
                 frozenset(self.reference_assemblies.items()),
+                self.database,
             )
         )
 
 
 def normalize_species_name(name):
     """
-    If species name was "Homo sapiens" then replace spaces with underscores
-    and return "homo_sapiens". Also replace common names like "human" with
-    "homo_sapiens".
+    If species name was "Homo sapiens" then replace spaces with underscores and
+    return "homo_sapiens".
+
+    Also replace common names like "human" with "homo_sapiens".
     """
     lower_name = name.lower().strip()
 
@@ -176,6 +184,8 @@ def find_species_by_name(species_name):
 def check_species_object(species_name_or_object):
     """
     Helper for validating user supplied species names or objects.
+
+    Return `Species` Object
     """
     if isinstance(species_name_or_object, Species):
         return species_name_or_object
@@ -352,3 +362,12 @@ def check_species_object(species_name_or_object):
         "R64-1-1": (75, MAX_ENSEMBL_RELEASE),
     },
 )
+
+rice = Species.register(
+    latin_name="oryza_sativa",
+    synonyms=["rice", "japanese_rice"],
+    reference_assemblies={
+        "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+    },
+    database="plants",
+)

From 9b426eb578edbd43cb08f1c529c52091a80ed9bf Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 19:28:34 -0600
Subject: [PATCH 15/35] bump version 2.3.0

---
 pyensembl/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/version.py b/pyensembl/version.py
index 519574c..55e4709 100644
--- a/pyensembl/version.py
+++ b/pyensembl/version.py
@@ -1 +1 @@
-__version__ = "2.2.10"
+__version__ = "2.3.0"

From bb6adf8a82508bbf0159875a607f473dced0b4ab Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 19:31:07 -0600
Subject: [PATCH 16/35] bump version 2.3.0

---
 pyensembl/ensembl_url_templates.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index 298e517..a3783b2 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -36,7 +36,7 @@
 GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/gtf/%(species)s/"
 
 DATABASE_FASTA_SUBDIR_TEMPLATE = (
-    "/pub/release-%(release)d/$(database)s/fasta/%(species)s/%(type)s/"
+    "/pub/release-%(release)d/%(database)s/fasta/%(species)s/%(type)s/"
 )
 DATABASE_GTF_SUBDIR_TEMPLATE = (
     "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"

From aceabe007f48b6c97f3bf6fe65d66ccb341a924f Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 19:49:24 -0600
Subject: [PATCH 17/35] fix bug in fasta name

---
 pyensembl/ensembl_url_templates.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index a3783b2..dc2b4da 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -114,7 +114,6 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
             "species": species,
         }
     else:
-        print(ensembl_release, species, database)
         subdir = DATABASE_GTF_SUBDIR_TEMPLATE % {
             "release": ensembl_release,
             "database": database,
@@ -126,11 +125,16 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
     return server + subdir + filename
 
 
-def make_fasta_filename(ensembl_release, species, sequence_type):
+def make_fasta_filename(ensembl_release, species, database, sequence_type):
     ensembl_release, species, reference_name = normalize_release_properties(
         ensembl_release, species
     )
-    if ensembl_release <= 75:
+    # for plant database, start from release 32 (inlcude 32) , the fasta file use the "old name"
+    # for releses before 31, the fasta file use the "new name"
+    # version 31 use both old and new name
+    if (ensembl_release <= 75 and database is None) or (
+        ensembl_release <= 31 and database is not None
+    ):
         if sequence_type == "ncrna":
             return OLD_FASTA_FILENAME_TEMPLATE_NCRNA % {
                 "Species": species.capitalize(),
@@ -198,6 +202,7 @@ def make_fasta_url(
     filename = make_fasta_filename(
         ensembl_release=ensembl_release,
         species=species,
+        database=database,
         sequence_type=sequence_type,
     )
     return server + subdir + filename

From e77c9b1712ac63fdee32c64ecdce3f674ab9d255 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 19:54:22 -0600
Subject: [PATCH 18/35] support arabidopsis

---
 pyensembl/species.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/pyensembl/species.py b/pyensembl/species.py
index 4249234..0998e5e 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -33,7 +33,9 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(cls, latin_name, synonyms, reference_assemblies, database=None):
+    def register(
+        cls, latin_name, synonyms, reference_assemblies, database=None
+    ):
         """
         Create a Species object from the given arguments and enter into all the
         dicts used to look the species up by its fields.
@@ -84,7 +86,9 @@ def all_species_release_pairs(cls):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=None):
+    def __init__(
+        self, latin_name, synonyms=[], reference_assemblies={}, database=None
+    ):
         """
         Parameters
         ----------
@@ -105,7 +109,8 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=No
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome" % i
+                        "Ensembl release %d already has an associated genome"
+                        % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -371,3 +376,13 @@ def check_species_object(species_name_or_object):
     },
     database="plants",
 )
+
+
+cress = Species.register(
+    latin_name="arabidopsis_thaliana",
+    synonyms=["cress", "thale_cress"],
+    reference_assemblies={
+        "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
+    },
+    database="plants",
+)

From 247f9cf7d18679ae31b5baf91285a7875b3bfb73 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 20:55:46 -0600
Subject: [PATCH 19/35] update check

---
 Makefile                                          |  4 ++--
 lint.sh                                           | 15 ---------------
 pyensembl/shell.py                                |  0
 requirements.txt                                  |  2 +-
 {test => tests}/__init__.py                       |  0
 {test => tests}/common.py                         |  0
 {test => tests}/data.py                           |  0
 {test => tests}/data/gencode.ucsc.small.gtf       |  0
 ...mouse.ensembl.81.partial.ENSMUSG00000017167.fa |  0
 ...ouse.ensembl.81.partial.ENSMUSG00000017167.gtf |  0
 ...ouse.ensembl.81.partial.ENSMUSG00000017167.pep |  0
 ...ensembl.81.partial.ncrna.ENSMUSG00000017167.fa |  0
 {test => tests}/data/refseq.ucsc.small.gtf        |  0
 {test => tests}/test_contigs.py                   |  0
 {test => tests}/test_download_cache.py            |  0
 {test => tests}/test_ensembl_gtf.py               |  0
 {test => tests}/test_ensembl_object_properties.py |  0
 {test => tests}/test_exon_id.py                   |  0
 {test => tests}/test_exon_object.py               |  0
 {test => tests}/test_gene_ids.py                  |  0
 {test => tests}/test_gene_names.py                |  0
 {test => tests}/test_gene_objects.py              |  0
 {test => tests}/test_id_length.py                 |  0
 {test => tests}/test_locus.py                     |  0
 {test => tests}/test_missing_genome_sources.py    |  0
 {test => tests}/test_mouse.py                     |  0
 {test => tests}/test_release_versions.py          |  0
 {test => tests}/test_search.py                    |  0
 {test => tests}/test_sequence_data.py             |  0
 {test => tests}/test_serialization.py             |  0
 {test => tests}/test_shell.py                     |  0
 {test => tests}/test_string_representation.py     |  0
 {test => tests}/test_timings.py                   |  0
 {test => tests}/test_transcript_ids.py            |  0
 {test => tests}/test_transcript_objects.py        |  0
 {test => tests}/test_transcript_sequences.py      |  0
 {test => tests}/test_transcript_support_level.py  |  0
 {test => tests}/test_ucsc_gtf.py                  |  0
 38 files changed, 3 insertions(+), 18 deletions(-)
 delete mode 100755 lint.sh
 mode change 100755 => 100644 pyensembl/shell.py
 rename {test => tests}/__init__.py (100%)
 rename {test => tests}/common.py (100%)
 rename {test => tests}/data.py (100%)
 rename {test => tests}/data/gencode.ucsc.small.gtf (100%)
 rename {test => tests}/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa (100%)
 rename {test => tests}/data/mouse.ensembl.81.partial.ENSMUSG00000017167.gtf (100%)
 rename {test => tests}/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep (100%)
 rename {test => tests}/data/mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa (100%)
 rename {test => tests}/data/refseq.ucsc.small.gtf (100%)
 rename {test => tests}/test_contigs.py (100%)
 rename {test => tests}/test_download_cache.py (100%)
 rename {test => tests}/test_ensembl_gtf.py (100%)
 rename {test => tests}/test_ensembl_object_properties.py (100%)
 rename {test => tests}/test_exon_id.py (100%)
 rename {test => tests}/test_exon_object.py (100%)
 rename {test => tests}/test_gene_ids.py (100%)
 rename {test => tests}/test_gene_names.py (100%)
 rename {test => tests}/test_gene_objects.py (100%)
 rename {test => tests}/test_id_length.py (100%)
 rename {test => tests}/test_locus.py (100%)
 rename {test => tests}/test_missing_genome_sources.py (100%)
 rename {test => tests}/test_mouse.py (100%)
 rename {test => tests}/test_release_versions.py (100%)
 rename {test => tests}/test_search.py (100%)
 rename {test => tests}/test_sequence_data.py (100%)
 rename {test => tests}/test_serialization.py (100%)
 rename {test => tests}/test_shell.py (100%)
 rename {test => tests}/test_string_representation.py (100%)
 rename {test => tests}/test_timings.py (100%)
 rename {test => tests}/test_transcript_ids.py (100%)
 rename {test => tests}/test_transcript_objects.py (100%)
 rename {test => tests}/test_transcript_sequences.py (100%)
 rename {test => tests}/test_transcript_support_level.py (100%)
 rename {test => tests}/test_ucsc_gtf.py (100%)

diff --git a/Makefile b/Makefile
index efc8962..d9844c6 100644
--- a/Makefile
+++ b/Makefile
@@ -13,8 +13,8 @@ PYTHON3 ?= python3
 all: check
 
 check:
-	./lint.sh
-	cd test && pytest
+	find pyensembl -name '*.py' | xargs pylint --errors-only --disable=unsubscriptable-object,not-an-iterable,no-member && echo 'Passes pylint check'
+	pytest --cov=pyensembl/ --cov-report=term-missing tests
 
 #: Clean up temporary files
 clean:
diff --git a/lint.sh b/lint.sh
deleted file mode 100755
index 9bac0e0..0000000
--- a/lint.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-set -o errexit
-
-
-# disabling several categories of errors due to false positives in pylint,
-# see these issues:
-# - https://bitbucket.org/logilab/pylint/issues/701/false-positives-with-not-an-iterable-and
-# - https://bitbucket.org/logilab/pylint/issues/58
-
-find pyensembl -name '*.py' \
-  | xargs pylint \
-  --errors-only \
-  --disable=print-statement,unsubscriptable-object,not-an-iterable,no-member
-
-echo 'Passes pylint check'
diff --git a/pyensembl/shell.py b/pyensembl/shell.py
old mode 100755
new mode 100644
diff --git a/requirements.txt b/requirements.txt
index 88f4b5e..03c7e70 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ typechecks>=0.0.2
 datacache>=1.1.4
 memoized-property>=1.0.2
 tinytimer
-gtfparse>=1.3.0,<2.0.0
+gtfparse>=2.1.0
 serializable
 nose>=1.3.3
 pylint>=1.4.4
diff --git a/test/__init__.py b/tests/__init__.py
similarity index 100%
rename from test/__init__.py
rename to tests/__init__.py
diff --git a/test/common.py b/tests/common.py
similarity index 100%
rename from test/common.py
rename to tests/common.py
diff --git a/test/data.py b/tests/data.py
similarity index 100%
rename from test/data.py
rename to tests/data.py
diff --git a/test/data/gencode.ucsc.small.gtf b/tests/data/gencode.ucsc.small.gtf
similarity index 100%
rename from test/data/gencode.ucsc.small.gtf
rename to tests/data/gencode.ucsc.small.gtf
diff --git a/test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa
similarity index 100%
rename from test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa
rename to tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa
diff --git a/test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.gtf b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.gtf
similarity index 100%
rename from test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.gtf
rename to tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.gtf
diff --git a/test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep
similarity index 100%
rename from test/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep
rename to tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep
diff --git a/test/data/mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa b/tests/data/mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa
similarity index 100%
rename from test/data/mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa
rename to tests/data/mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa
diff --git a/test/data/refseq.ucsc.small.gtf b/tests/data/refseq.ucsc.small.gtf
similarity index 100%
rename from test/data/refseq.ucsc.small.gtf
rename to tests/data/refseq.ucsc.small.gtf
diff --git a/test/test_contigs.py b/tests/test_contigs.py
similarity index 100%
rename from test/test_contigs.py
rename to tests/test_contigs.py
diff --git a/test/test_download_cache.py b/tests/test_download_cache.py
similarity index 100%
rename from test/test_download_cache.py
rename to tests/test_download_cache.py
diff --git a/test/test_ensembl_gtf.py b/tests/test_ensembl_gtf.py
similarity index 100%
rename from test/test_ensembl_gtf.py
rename to tests/test_ensembl_gtf.py
diff --git a/test/test_ensembl_object_properties.py b/tests/test_ensembl_object_properties.py
similarity index 100%
rename from test/test_ensembl_object_properties.py
rename to tests/test_ensembl_object_properties.py
diff --git a/test/test_exon_id.py b/tests/test_exon_id.py
similarity index 100%
rename from test/test_exon_id.py
rename to tests/test_exon_id.py
diff --git a/test/test_exon_object.py b/tests/test_exon_object.py
similarity index 100%
rename from test/test_exon_object.py
rename to tests/test_exon_object.py
diff --git a/test/test_gene_ids.py b/tests/test_gene_ids.py
similarity index 100%
rename from test/test_gene_ids.py
rename to tests/test_gene_ids.py
diff --git a/test/test_gene_names.py b/tests/test_gene_names.py
similarity index 100%
rename from test/test_gene_names.py
rename to tests/test_gene_names.py
diff --git a/test/test_gene_objects.py b/tests/test_gene_objects.py
similarity index 100%
rename from test/test_gene_objects.py
rename to tests/test_gene_objects.py
diff --git a/test/test_id_length.py b/tests/test_id_length.py
similarity index 100%
rename from test/test_id_length.py
rename to tests/test_id_length.py
diff --git a/test/test_locus.py b/tests/test_locus.py
similarity index 100%
rename from test/test_locus.py
rename to tests/test_locus.py
diff --git a/test/test_missing_genome_sources.py b/tests/test_missing_genome_sources.py
similarity index 100%
rename from test/test_missing_genome_sources.py
rename to tests/test_missing_genome_sources.py
diff --git a/test/test_mouse.py b/tests/test_mouse.py
similarity index 100%
rename from test/test_mouse.py
rename to tests/test_mouse.py
diff --git a/test/test_release_versions.py b/tests/test_release_versions.py
similarity index 100%
rename from test/test_release_versions.py
rename to tests/test_release_versions.py
diff --git a/test/test_search.py b/tests/test_search.py
similarity index 100%
rename from test/test_search.py
rename to tests/test_search.py
diff --git a/test/test_sequence_data.py b/tests/test_sequence_data.py
similarity index 100%
rename from test/test_sequence_data.py
rename to tests/test_sequence_data.py
diff --git a/test/test_serialization.py b/tests/test_serialization.py
similarity index 100%
rename from test/test_serialization.py
rename to tests/test_serialization.py
diff --git a/test/test_shell.py b/tests/test_shell.py
similarity index 100%
rename from test/test_shell.py
rename to tests/test_shell.py
diff --git a/test/test_string_representation.py b/tests/test_string_representation.py
similarity index 100%
rename from test/test_string_representation.py
rename to tests/test_string_representation.py
diff --git a/test/test_timings.py b/tests/test_timings.py
similarity index 100%
rename from test/test_timings.py
rename to tests/test_timings.py
diff --git a/test/test_transcript_ids.py b/tests/test_transcript_ids.py
similarity index 100%
rename from test/test_transcript_ids.py
rename to tests/test_transcript_ids.py
diff --git a/test/test_transcript_objects.py b/tests/test_transcript_objects.py
similarity index 100%
rename from test/test_transcript_objects.py
rename to tests/test_transcript_objects.py
diff --git a/test/test_transcript_sequences.py b/tests/test_transcript_sequences.py
similarity index 100%
rename from test/test_transcript_sequences.py
rename to tests/test_transcript_sequences.py
diff --git a/test/test_transcript_support_level.py b/tests/test_transcript_support_level.py
similarity index 100%
rename from test/test_transcript_support_level.py
rename to tests/test_transcript_support_level.py
diff --git a/test/test_ucsc_gtf.py b/tests/test_ucsc_gtf.py
similarity index 100%
rename from test/test_ucsc_gtf.py
rename to tests/test_ucsc_gtf.py

From 82b52bc9f3951c1eb232b76a18881f251e857f0a Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 20:57:48 -0600
Subject: [PATCH 20/35] format code

---
 pyensembl/download_cache.py        | 19 ++++++-------------
 pyensembl/ensembl_url_templates.py | 20 +++++---------------
 pyensembl/species.py               | 11 +++--------
 setup.py                           |  1 +
 4 files changed, 15 insertions(+), 36 deletions(-)

diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py
index 48ebd00..47a0766 100644
--- a/pyensembl/download_cache.py
+++ b/pyensembl/download_cache.py
@@ -151,10 +151,7 @@ def _fields(self):
         )
 
     def __eq__(self, other):
-        return (
-            other.__class__ is DownloadCache
-            and self._fields() == other._fields()
-        )
+        return other.__class__ is DownloadCache and self._fields() == other._fields()
 
     def __hash__(self):
         return hash(self._fields())
@@ -216,14 +213,10 @@ def cached_path(self, path_or_url):
         # if we expect the download function to decompress this file then
         # we should use its name without the compression extension
         if self.decompress_on_download:
-            local_filename = self._remove_compression_suffix_if_present(
-                local_filename
-            )
+            local_filename = self._remove_compression_suffix_if_present(local_filename)
 
         if len(local_filename) == 0:
-            raise ValueError(
-                "Can't determine local filename for %s" % (path_or_url,)
-            )
+            raise ValueError("Can't determine local filename for %s" % (path_or_url,))
 
         return join(self.cache_directory_path, local_filename)
 
@@ -326,9 +319,9 @@ def delete_cached_files(self, prefixes=[], suffixes=[]):
         """
         if isdir(self.cache_directory_path):
             for filename in listdir():
-                delete = any(
-                    [filename.endswith(ext) for ext in suffixes]
-                ) or any([filename.startswith(pre) for pre in prefixes])
+                delete = any([filename.endswith(ext) for ext in suffixes]) or any(
+                    [filename.startswith(pre) for pre in prefixes]
+                )
                 if delete:
                     path = join(self.cache_directory_path, filename)
                     logger.info("Deleting %s", path)
diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index dc2b4da..4fcf774 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -38,9 +38,7 @@
 DATABASE_FASTA_SUBDIR_TEMPLATE = (
     "/pub/release-%(release)d/%(database)s/fasta/%(species)s/%(type)s/"
 )
-DATABASE_GTF_SUBDIR_TEMPLATE = (
-    "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
-)
+DATABASE_GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
 
 # GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
 GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
@@ -54,15 +52,11 @@
 # ncRNA FASTA file for releases before (and including) Ensembl 75
 # example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
 
-OLD_FASTA_FILENAME_TEMPLATE_NCRNA = (
-    "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
-)
+OLD_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
 
 # cDNA & protein FASTA file for releases after Ensembl 75
 # example: Homo_sapiens.GRCh37.cdna.all.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE = (
-    "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
-)
+NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
 
 # ncRNA FASTA file for releases after Ensembl 75
 # example: Homo_sapiens.GRCh37.ncrna.fa.gz
@@ -105,9 +99,7 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
             server = ENSEMBL_FTP_SERVER
         else:
             server = ENSEMBLGENOME_FTP_SERVER
-    ensembl_release, species, _ = normalize_release_properties(
-        ensembl_release, species
-    )
+    ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
     if database is None:
         subdir = GTF_SUBDIR_TEMPLATE % {
             "release": ensembl_release,
@@ -119,9 +111,7 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
             "database": database,
             "species": species,
         }
-    filename = make_gtf_filename(
-        ensembl_release=ensembl_release, species=species
-    )
+    filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
     return server + subdir + filename
 
 
diff --git a/pyensembl/species.py b/pyensembl/species.py
index 0998e5e..1be8180 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -33,9 +33,7 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(
-        cls, latin_name, synonyms, reference_assemblies, database=None
-    ):
+    def register(cls, latin_name, synonyms, reference_assemblies, database=None):
         """
         Create a Species object from the given arguments and enter into all the
         dicts used to look the species up by its fields.
@@ -86,9 +84,7 @@ def all_species_release_pairs(cls):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(
-        self, latin_name, synonyms=[], reference_assemblies={}, database=None
-    ):
+    def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=None):
         """
         Parameters
         ----------
@@ -109,8 +105,7 @@ def __init__(
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome"
-                        % i
+                        "Ensembl release %d already has an associated genome" % i
                     )
                 self._release_to_genome[i] = genome_name
 
diff --git a/setup.py b/setup.py
index 45dc0a4..65dee28 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@
 # limitations under the License.
 
 from __future__ import print_function
+
 import os
 import re
 

From 6b6d8db5a357aa6028a00d304c6c46d671b7dc75 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 21:33:36 -0600
Subject: [PATCH 21/35] update config

---
 pyensembl/config.py                           | 163 ++++++++++++++
 pyensembl/ensembl_release.py                  |  11 +-
 pyensembl/ensembl_release_versions.py         |  26 ++-
 pyensembl/shell.py                            |  16 +-
 pyensembl/species.py                          | 205 ++----------------
 ...e.ensembl.81.partial.ENSMUSG00000017167.db | Bin 0 -> 249856 bytes
 ...bl.81.partial.ENSMUSG00000017167.fa.pickle | Bin 0 -> 3736 bytes
 ...l.81.partial.ENSMUSG00000017167.pep.pickle | Bin 0 -> 2850 bytes
 8 files changed, 210 insertions(+), 211 deletions(-)
 create mode 100644 pyensembl/config.py
 create mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db
 create mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle
 create mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle

diff --git a/pyensembl/config.py b/pyensembl/config.py
new file mode 100644
index 0000000..3dfd54a
--- /dev/null
+++ b/pyensembl/config.py
@@ -0,0 +1,163 @@
+# TODO: save the config in YMAL file, or TOML file?
+
+MIN_ENSEMBL_RELEASE = 54
+MAX_ENSEMBL_RELEASE = 110
+MIN_ENSEMBLGENOME_RELEASE = 50
+MAX_ENSEMBLGENOME_RELEASE = 57
+
+
+SPECIES_DATA = [
+    {
+        "latin_name": "homo_sapiens",
+        "synonyms": ["human"],
+        "reference_assemblies": {
+            "NCBI36": (54, 54),
+            "GRCh37": (55, 75),
+            "GRCh38": (76, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "mus_musculus",
+        "synonyms": ["mouse", "house mouse"],
+        "reference_assemblies": {
+            "NCBIM37": (54, 67),
+            "GRCm38": (68, 102),
+            "GRCm39": (103, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "canis_familiaris",
+        "synonyms": ["dog"],
+        "reference_assemblies": {"CanFam3.1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "felis_catus",
+        "synonyms": ["cat"],
+        "reference_assemblies": {
+            "Felis_catus_6.2": (75, 90),
+            "Felis_catus_8.0": (91, 92),
+            "Felis_catus_9.0": (93, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "gallus_gallus",
+        "synonyms": ["chicken"],
+        "reference_assemblies": {
+            "Galgal4": (75, 85),
+            "Gallus_gallus-5.0": (86, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "rattus_norvegicus",
+        "synonyms": ["rat", "brown_rat", "lab_rat"],
+        "reference_assemblies": {
+            "Rnor_5.0": (75, 79),
+            "Rnor_6.0": (80, 104),
+            "mRatBN7.2": (105, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "macaca_fascicularis",
+        "synonyms": ["macaque", "Crab-eating_macaque"],
+        "reference_assemblies": {
+            "Macaca_fascicularis_6.0": (103, MAX_ENSEMBL_RELEASE)
+        },
+    },
+    {
+        "latin_name": "chlorocebus_sabaeus",
+        "synonyms": ["green_monkey", "african_green_monkey"],
+        "reference_assemblies": {"ChlSab1.1": (86, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "macaca_mulatta",
+        "synonyms": ["rhesus"],
+        "reference_assemblies": {"Mmul_10": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "oryctolagus_cuniculus",
+        "synonyms": ["rabbit"],
+        "reference_assemblies": {"OryCun2.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "meriones_unguiculatus",
+        "synonyms": ["gerbil"],
+        "reference_assemblies": {"MunDraft-v1.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "mesocricetus_auratus",
+        "synonyms": ["syrian_hamster"],
+        "reference_assemblies": {"MesAur1.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "cricetulus_griseus_chok1gshd",
+        "synonyms": ["chinese_hamster"],
+        "reference_assemblies": {"CHOK1GS_HDv1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "heterocephalus_glaber_female",
+        "synonyms": ["naked_mole_rat"],
+        "reference_assemblies": {
+            "HetGla_female_1.0": (75, MAX_ENSEMBL_RELEASE)
+        },
+    },
+    {
+        "latin_name": "cavia_porcellus",
+        "synonyms": ["guinea_pig"],
+        "reference_assemblies": {"Cavpor3.0": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "sus_scrofa",
+        "synonyms": ["pig"],
+        "reference_assemblies": {"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "danio_rerio",
+        "synonyms": ["zebrafish"],
+        "reference_assemblies": {
+            "Zv8": (54, 59),
+            "Zv9": (60, 79),
+            "GRCz10": (80, 91),
+            "GRCz11": (92, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "drosophila_melanogaster",
+        "synonyms": ["drosophila", "fruit fly", "fly"],
+        "reference_assemblies": {
+            "BDGP5": (75, 78),
+            "BDGP6": (79, 95),
+            "BDGP6.22": (96, 98),
+            "BDGP6.28": (99, 102),
+            "BDGP6.32": (103, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "caenorhabditis_elegans",
+        "synonyms": ["nematode", "C_elegans"],
+        "reference_assemblies": {
+            "WS200": (55, 57),
+            "WS210": (58, 60),
+            "WS220": (61, 66),
+            "WBcel235": (67, MAX_ENSEMBL_RELEASE),
+        },
+    },
+    {
+        "latin_name": "saccharomyces_cerevisiae",
+        "synonyms": ["yeast", "budding_yeast"],
+        "reference_assemblies": {"R64-1-1": (75, MAX_ENSEMBL_RELEASE)},
+    },
+    {
+        "latin_name": "oryza_sativa",
+        "synonyms": ["rice", "japanese_rice"],
+        "reference_assemblies": {
+            "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+    },
+    {
+        "latin_name": "arabidopsis_thaliana",
+        "synonyms": ["cress", "thale_cress"],
+        "reference_assemblies": {
+            "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+    },
+]
diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 8af2584..521e2d7 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -16,12 +16,9 @@
 """
 from weakref import WeakValueDictionary
 
-from .ensembl_release_versions import MAX_ENSEMBL_RELEASE, check_release_number
-from .ensembl_url_templates import (
-    ENSEMBL_FTP_SERVER,
-    make_fasta_url,
-    make_gtf_url,
-)
+from .config import MAX_ENSEMBL_RELEASE  # ENSEMBL_FTP_SERVER,
+from .ensembl_release_versions import check_release_number
+from .ensembl_url_templates import make_fasta_url, make_gtf_url
 from .genome import Genome
 from .species import check_species_object, human
 
@@ -72,7 +69,7 @@ def __init__(
         release=MAX_ENSEMBL_RELEASE,
         species=human,
         server=None,
-        # ENSEMBL_FTP_SERVER,
+        # server=EMBL_FTP_SERVER,,
     ):
         self.release, self.species, self.server = self.normalize_init_values(
             release=release, species=species, server=server
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 020c6e6..05d4a15 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -10,25 +10,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-MIN_ENSEMBL_RELEASE = 54
-MAX_ENSEMBL_RELEASE = 110
-MIN_ENSEMBLGENOME_RELEASE = 50
-MAX_ENSEMBLGENOME_RELEASE = 57
+from .config import (
+    MAX_ENSEMBL_RELEASE,
+    MAX_ENSEMBLGENOME_RELEASE,
+    MIN_ENSEMBL_RELEASE,
+    MIN_ENSEMBLGENOME_RELEASE,
+)
 
 
-def check_release_number(release):
+def check_release_number(release, database=None):
     """
-    Check to make sure a release is in the valid range of
-    Ensembl releases.
+    Check to make sure a release is in the valid range of Ensembl releases.
     """
     try:
         release = int(release)
-    except:
+    except ValueError:
         raise ValueError("Invalid Ensembl release: %s" % release)
-
-    if release < MIN_ENSEMBL_RELEASE:
+    if database is None:
+        min_release = MIN_ENSEMBL_RELEASE
+    else:
+        min_release = MIN_ENSEMBLGENOME_RELEASE
+    if release < min_release:
         raise ValueError(
             "Invalid Ensembl releases %d, must be greater than %d"
-            % (release, MIN_ENSEMBL_RELEASE)
+            % (release, min_release)
         )
     return release
diff --git a/pyensembl/shell.py b/pyensembl/shell.py
index 0a7a54f..546dfa9 100644
--- a/pyensembl/shell.py
+++ b/pyensembl/shell.py
@@ -47,11 +47,14 @@
 
 import pkg_resources
 
-from .ensembl_release import MAX_ENSEMBL_RELEASE, EnsemblRelease
+from .config import MAX_ENSEMBL_RELEASE
+from .ensembl_release import EnsemblRelease
 from .genome import Genome
 from .species import Species, normalize_species_name
 
-logging.config.fileConfig(pkg_resources.resource_filename(__name__, "logging.conf"))
+logging.config.fileConfig(
+    pkg_resources.resource_filename(__name__, "logging.conf")
+)
 logger = logging.getLogger(__name__)
 
 
@@ -162,7 +165,14 @@ def collect_all_available_ensembl_releases():
     for species_name in Species.all_registered_latin_names():
         species = Species._latin_names_to_species[species_name]
         # print in tree format
-        print("* " + species_name + " (" + ", ".join(species.synonyms) + ")" + ":")
+        print(
+            "* "
+            + species_name
+            + " ("
+            + ", ".join(species.synonyms)
+            + ")"
+            + ":"
+        )
         for (
             release_name,
             release_range,
diff --git a/pyensembl/species.py b/pyensembl/species.py
index 1be8180..fe8f3b0 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -12,10 +12,7 @@
 
 from serializable import Serializable
 
-from .ensembl_release_versions import (
-    MAX_ENSEMBL_RELEASE,
-    MAX_ENSEMBLGENOME_RELEASE,
-)
+from .config import SPECIES_DATA
 
 # TODO: replace Serializable with data class
 
@@ -33,7 +30,9 @@ class Species(Serializable):
     _reference_names_to_species = {}
 
     @classmethod
-    def register(cls, latin_name, synonyms, reference_assemblies, database=None):
+    def register(
+        cls, latin_name, synonyms, reference_assemblies, database=None
+    ):
         """
         Create a Species object from the given arguments and enter into all the
         dicts used to look the species up by its fields.
@@ -84,7 +83,9 @@ def all_species_release_pairs(cls):
                 for release in range(release_range[0], release_range[1] + 1):
                     yield species_name, release
 
-    def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=None):
+    def __init__(
+        self, latin_name, synonyms=[], reference_assemblies={}, database=None
+    ):
         """
         Parameters
         ----------
@@ -105,7 +106,8 @@ def __init__(self, latin_name, synonyms=[], reference_assemblies={}, database=No
             for i in range(start, end + 1):
                 if i in self._release_to_genome:
                     raise ValueError(
-                        "Ensembl release %d already has an associated genome" % i
+                        "Ensembl release %d already has an associated genome"
+                        % i
                     )
                 self._release_to_genome[i] = genome_name
 
@@ -198,186 +200,9 @@ def check_species_object(species_name_or_object):
         )
 
 
-human = Species.register(
-    latin_name="homo_sapiens",
-    synonyms=["human"],
-    reference_assemblies={
-        "NCBI36": (54, 54),
-        "GRCh37": (55, 75),
-        "GRCh38": (76, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-mouse = Species.register(
-    latin_name="mus_musculus",
-    synonyms=["mouse", "house mouse"],
-    reference_assemblies={
-        "NCBIM37": (54, 67),
-        "GRCm38": (68, 102),
-        "GRCm39": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-dog = Species.register(
-    latin_name="canis_familiaris",
-    synonyms=["dog"],
-    reference_assemblies={"CanFam3.1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-cat = Species.register(
-    latin_name="felis_catus",
-    synonyms=["cat"],
-    reference_assemblies={
-        "Felis_catus_6.2": (75, 90),
-        "Felis_catus_8.0": (91, 92),
-        "Felis_catus_9.0": (93, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-chicken = Species.register(
-    latin_name="gallus_gallus",
-    synonyms=["chicken"],
-    reference_assemblies={
-        "Galgal4": (75, 85),
-        "Gallus_gallus-5.0": (86, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-# Does the black rat (Rattus Rattus) get used for research too?
-brown_rat = Species.register(
-    latin_name="rattus_norvegicus",
-    synonyms=["brown rat", "lab rat", "rat"],
-    reference_assemblies={
-        "Rnor_5.0": (75, 79),
-        "Rnor_6.0": (80, 104),
-        "mRatBN7.2": (105, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-macaque = Species.register(
-    latin_name="macaca_fascicularis",
-    synonyms=["macaque", "Crab-eating macaque"],
-    reference_assemblies={
-        "Macaca_fascicularis_6.0": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-green_monkey = Species.register(
-    latin_name="chlorocebus_sabaeus",
-    synonyms=["green_monkey", "african_green_monkey"],
-    reference_assemblies={
-        "ChlSab1.1": (86, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-rhesus = Species.register(
-    latin_name="macaca_mulatta",
-    synonyms=["rhesus"],
-    reference_assemblies={"Mmul_10": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-rabbit = Species.register(
-    latin_name="oryctolagus_cuniculus",
-    synonyms=["rabbit"],
-    reference_assemblies={"OryCun2.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-gerbil = Species.register(
-    latin_name="meriones_unguiculatus",
-    synonyms=["gerbil"],
-    reference_assemblies={"MunDraft-v1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-syrian_hamster = Species.register(
-    latin_name="mesocricetus_auratus",
-    synonyms=["syrian_hamster"],
-    reference_assemblies={"MesAur1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-chinese_hamster = Species.register(
-    latin_name="cricetulus_griseus_chok1gshd",
-    synonyms=["chinese_hamster"],
-    reference_assemblies={"CHOK1GS_HDv1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-naked_mole_rat = Species.register(
-    latin_name="heterocephalus_glaber_female",
-    synonyms=["naked_mole_rat"],
-    reference_assemblies={"HetGla_female_1.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-guinea_pig = Species.register(
-    latin_name="cavia_porcellus",
-    synonyms=["guinea_pig"],
-    reference_assemblies={"Cavpor3.0": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-pig = Species.register(
-    latin_name="sus_scrofa",
-    synonyms=["pig"],
-    reference_assemblies={"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)},
-)
-
-zebrafish = Species.register(
-    latin_name="danio_rerio",
-    synonyms=["zebrafish"],
-    reference_assemblies={
-        # "ZFISH7": (47, 53),
-        "Zv8": (54, 59),
-        "Zv9": (60, 79),
-        "GRCz10": (80, 91),
-        "GRCz11": (92, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-fly = Species.register(
-    latin_name="drosophila_melanogaster",
-    synonyms=["drosophila", "fruit fly", "fly"],
-    reference_assemblies={
-        "BDGP5": (75, 78),
-        "BDGP6": (79, 95),
-        "BDGP6.22": (96, 98),
-        "BDGP6.28": (99, 102),
-        "BDGP6.32": (103, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-nematode = Species.register(
-    latin_name="caenorhabditis_elegans",
-    synonyms=["nematode", "C_elegans"],
-    reference_assemblies={
-        # "WS180": (47, 49),
-        # "WS190": (50, 54),
-        "WS200": (55, 57),
-        "WS210": (58, 60),
-        "WS220": (61, 66),
-        "WBcel235": (67, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-yeast = Species.register(
-    latin_name="saccharomyces_cerevisiae",
-    synonyms=["yeast", "budding_yeast"],
-    reference_assemblies={
-        "R64-1-1": (75, MAX_ENSEMBL_RELEASE),
-    },
-)
-
-rice = Species.register(
-    latin_name="oryza_sativa",
-    synonyms=["rice", "japanese_rice"],
-    reference_assemblies={
-        "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
-    },
-    database="plants",
-)
-
-
-cress = Species.register(
-    latin_name="arabidopsis_thaliana",
-    synonyms=["cress", "thale_cress"],
-    reference_assemblies={
-        "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
-    },
-    database="plants",
-)
+for data in SPECIES_DATA:
+    globals()[data["synonyms"][0]] = Species.register(
+        latin_name=data["latin_name"],
+        synonyms=data["synonyms"],
+        reference_assemblies=data["reference_assemblies"],
+    )
diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db
new file mode 100644
index 0000000000000000000000000000000000000000..e2eba44f73986fcae19abbdd0794bdc1a8aaee76
GIT binary patch
literal 249856
zcmeI5eRv$jo#*GZdqz@s>BXawttB2K8{9F$>KTn>U6#u_vLIfJjgkGtJaCzerLjGG
z@<^7(<|Wy?)gJ$teQsIECJ*}%?l!Rdc%QxGF5L5E@Ak3|**pigFCi>}i!olX{lFdA
zfL{RP%VXH*s=9mBzpm+#G<dRp;X9AjuKIRW{eJc7?y9Nony!(}8;a#ZYRAOX?tD3w
z3A6@+!N6~%Qh`7q#Q$#Mf0a*`2O^aS|365=wSu9*jZ;rV`Q8xwP^9T+7ubg_tC|ls
z?`m#p`a$eV>JQYLl)qPQj{Y#ZKJv53-N8SPTp#QX9FPi}>Qs|!mj!27u{2h=w>*_E
zO^;3$C(Ai2p4(X{6>_Eg?t;4~IdsSH+AYJW^&8g>Z%etloFlbqBiWl;Ia`!5(~?k=
zo4L`9ztQQ!9$_*!UCvLHbA{5FYov2-<5f9jC!7Q~q^z=UNb$mr2k!4slgpL`_kW?B
z-#$)i>Y82TnBB7WiyMYLwQ|$s_7$e4ixZ{Pmf>w%QX4n%zpWcKa7z<=r$!61O7e-i
z%DZP_$vX=7PLyg{B4rlG?2KDCuHU?MICc9S>u+0o$DOHLhwr4G3BLR|dc3vS%VsTq
zwihSL_f1y$IxXxv_S|X{XKLiLs#0;(&A6kGFYleIvNAe4Hl3?Z%@3ed1FP85-rd^^
zQ*tecPt$uRCnxyXHD1_P7?;yowXJbDt$o#gOioRd3&oO;hm}3P<r$epO7fOv!GLuo
zOz#=zZALD?w>)8Ae#BwZ4JRhs)#Mj?f-}u_F6%NGofzY1hMmn}WnVwmjt<XtLw0y`
zh|7iSOs#ZEGVW_rlf&F%vWCS<C5vMud;2^_-Aq+>-Cf3H>)e%fP9tKsZ63Ro6Hl(J
zqZ>0ZrY1M>+ODcmTlY{{Z6h(=I*(~7XH~tW{l-j4SChALS9<1m#lxO0+!1~<%Or>!
z#W7=2oBIejmjuC}&ttuE5y^HP-PqTnCWm;fJ8IP0N+;S)VpCg)O{=Z-wJP=)Gp)^P
za)cWmsA0IOgCP#I%;P{+-YV?Az0+?ssmY$6;Qnr_cOv#ku-P$f_fKR;Y9%?8zAext
z?x5Uu-||{&%N-!T?Q@M@1-%jf&fzwnGP=qh-*dqji7QDn5wv?B`(hMd3v*64Ae1K0
zSzet+oYX>f`;NO~N^&3({F-%1kqi}Aieo~I*DBXd<`(U0#)#_gveTH0qGPp+CLMw8
za-|0tSNON$MCDsC@tIsrqQMY*)LNxWX<hH79mbBRn!JfyQ)NOVp$$n{fGxW@x9j66
zz_>G_B(sU&J+c|AewFG(Ynhl$<yK@?GRk58{eV}YTQ0k*(Gz(hwQN+Sc5V5#jk|(s
zGLZ=G&sc4vC|dl5oZ{`)P$*~y?Xd_sA8It8l}kbz#9T>V%I6&OxGhJ0E#vheCHeV8
zt?%jL9Lt$}%SYGUEBIz{hsL)-_GEzn!v_RF00ck)1V8`;KmY_l00ck)1VG@@5x6VZ
zsTo?grbXHerfCT+qG`<3v_fgRuzUM>ZdZO^zLXy-l}q_aQ&A7zWtx@S2mGO0v9xoQ
z822l&mUaEZ8%J*2I&w>|^)Uy`{(&Z5uOK@a;Q#Oe0T2KI5C8!X009sH0T2KI5C8!X
zxZ(*U!e+eI`2S3Row?#GhzfuJ2!H?xfB*=900@8p2!H?xfB*<M1X#2!5{aAEJOME!
zz<&OJF2K$K5eR?)2!H?xfB*=900@8p2!H?xfWTEnpefuPcYZHmkN-aku#c{)8lo~F
z00JNY0w4eaAOHd&00JNY0w4ea4uNo3i(~wc(*XoP00ck)1V8`;KmY_l00ck)1g>rZ
z82?}0#}~B&0T2KI5C8!X009sH0T2KI5C8!|0ONn02Ot0fAOHd&00JNY0w4eaAOHd&
zaP<?w`2XrZ#;73(fB*=900@8p2!H?xfB*=900@Zjzs4~H*;s)8!v_RF00ck)1V8`;
zKmY_l00ck)1VG>-2+S-Fb~2-<TQehV1~astr4h!MscD%)X}Yj``*?0weqX+ncV5Z=
zx!zvW2@Y=@xozvnmR|8OdoyP5prXF`6VsfWnkW~FrQGPmSh2Kol{Y$F&QF!au8Gpn
z(7KUywlACI+5V?#_S@y%VwKlx4w(G|_WS?N1lXC2s35X}00@8p2!H?xfB*=900@8p
z2!H?xTvh~Fv@H^eo2IyrAZ`lS<NtF3cJ8ur6V8AD2!H?xfB*=900@8p2!H?xfB*<w
zEP<wQcifp_V2}S#2H44qEfo1d00ck)1V8`;KmY_l00ck)1V8`;E<J%n*o@bj|Bvzi
zr9VFK00ck)1V8`;KmY_l00ck)1V8`;8bQDv|I_yg*w6pZ2H4p~*gzQ|00JNY0w4ea
zAOHd&00JNY0w4eamjwYm+*6tRFJ3fgkN-aku#YYa@8ARofB*=900@8p2!H?xfB*=9
z00@9Ug9(WB|1kb<FjN!=0w4eaAOHd&00JNY0w4eaAOHfF83A|v|9*hIf0;Q4cR&CH
zKmY_l00ck)1V8`;KmY_l00b_EKudUOd}wIhNIKh>&1$@)kbWq@_V5ipAOHd&00JNY
z0w4eaAOHd&00JOzWf7Q3hB}$i)2*42HiH@3vZWD?F;mkrh0=6k_xADJuKd1yDetU;
zCw>X<1npPk(^upv>d#M@=H%2wxlk<SMkmIKrJbw1(ehNjG(9?1oGi}{td&^HI<vp}
zh5Iut!A`cg;#pQ(ra7LOvPWq@k1}fp`_hVf@Gd`(Mhnx^g|VFLjT=(bBR|dhxFhEO
zhuDh&_99jQys}Plcnbm`00JNY0w4eaAOHd&00JQJDJRewHiOl7F+x@E|F>=>*yI1B
z0d|HR{giV<^+5mxKmY_l00ck)1V8`;KmY_l;7TRX8f}Y2f~G0vFo+omAtr>l5DwlY
z<^<T|{}Tarj-9wtD+#|r00ck)1V8`;KmY_l00ck)1V8`;90JYZ?x6F7f>2YqyRru0
z?EL?e{6_%zfB*=900@8p2!H?xfB*=900@8p2wW)y-1+~v^jaTt!0aC&^Z%)=vHut7
z2p$b+W9<2sQ!U?US=D^7c~^5&(+^@_Qh%V{r2M^dbM%MN^^u=N?hYOe{&}!Fa3C<q
zQH&?m<W0+hGitFkR=C%F(Lm0M=XMrKg<NsW)w_o77+$+&IJJJ`y5VgpSNEn*ZQ4lo
zrB*t{7*m~UaxJ&U=CD@E?=HCICKq5&<nXm9b{I1)2{pNy8_oC|oi6MV>2lNM{8Twt
zD3Qu`&TYIZr|g82;D(e{_6;dsxRF_;ByU+33|P)g?-?(a3%UH>@`TOXt$5CK!-)s(
z?@*J=mIe2Jp`717PVl&C7CF9dS^LEe!yfo<n%us^)O2y8l-e@9ZA)t7CjPf|!v^l&
z#NMgVf~=B!7GWuqbxi$s6z-iU)v`p&ERNX*w{Bd&dFycM_B+<!w)T!YQ@0M^Nj(z>
zg`aF5Z|wsnyKDKgy*N?6Z?dW?)55M}&#g9brba%iDiufFj5`YX^4_T`E2E=h)BIwQ
z9pgt`s)1E(Y47gsg(*2P@o9SR<m3cDCC3Z<3gdD*tF|=`r*pN*St{>3srIn4$G1FV
zvRzGnp(i-gY@f2$Wx`*d!Vj{2&N|h$uODkihv&K>JG?o><wAC*Ryrja_qD0XVQw*5
z!(zqX;uy)^K95m1Q<YtJmoeEocV(S3g4k`F$8O~uk}K=z#!QT<$xXbrt7_DioD$-4
zBc@yDF)ih+s<*V?m<j1>@>cFj&-|`<*t3N@!p}aL1aYG{W=v{x9|7m=CK&X2tXHlF
z*{-7-`&!iG5U+JdjapmjB=p)sY+9|OuT`<fm}zZRlOx>lKn=rH9So^-%RCNL<*mZ*
z*E>D%TTN=Rrzg0-+v;eDJrZnoOxs-z*^ydF4yA8na*6vIw+XkrmYQV;h}>>khrdP#
zfsTcLeQ=vk8C_+M@3~-%#FeC(2-;tL?2A!+3C%g(fKZw|XL)rRaZ(G_?K|#{DanCE
z@N3p3MKV-eDUJy-UaMR;nOn5086&E{%T8l1ijLJPnsfxV%atBrT;bn}6P0hp#Ak9f
zi3UULQEQbhrFFfRb{IRN{2M>FW`E=Nw`N097GTS6&h7dTOS~PwGomE3iQql*fv_xA
zsuQhcVm6gqky*(ohxzvdUWIPC?5ajj<cZX>QI*=Y<=Zyy3aZIOBDg<ewT+@^@fUK6
zw_8J@pc%A>736%V(R@}e32A6?C4Il1bNJ%69QCz~*N2qk=M%LCiQ*i~nPMB}BlwuO
zp<zG&KQ4a%e_X`z0Ra#I0T2KI5C8!X009sH0T2KI5V!&f=;5BqTz|gGL7IhoDnA1d
ziynl;`hV?_(*fnH(HElsDsr0L$C~v&)URuPp*hzaYx-|ZBk>dQN8|r8_O9~H$R9=4
zh5v8(;o$XwM+J$dZ~y9e%(Seg58tQD6O&@;h1q9^Rkz^W6n&oCcCPL`L#)avZr-`6
zQY(FL;qAQ9evZD_dAw0^$IY1W+|F}#+|>IN-PCiR6MfP<cq5-94+`wtob=g-P|}_g
zygH3Ip@mAGZQMYfZIBGElR`9WmFp&Ri*_|*WR<_mwjustW}?(vt7s>UTdpG*<DaF^
zHf+@bL2eF<hXG>G>RP2sX;YgpYpJUFa~WMevzE55upgLcR%<H`vG`l*9A^80iBQnC
zqHXm|NUO9;{?Cm!EmxAOJA(h(diuv#K=#fm;==WwhNLQmXr;L5wyWEE_@l}W=TW7b
zyy8W1jBD4`DAP?eTdXS?o38bBRh;yErWh}FMU7&e6tiVIabwpqK0BtPG8xPD)xG?4
zqB-R|me(j*N;zvm(v9sYHTijNDJ)?o>8u*`%x}kjNX&imT<H86@usD|c&rCpR^ivy
zDBMXg>%0>;wthxQW;%k?lF!y|hs(0LiWF(Qg$l)(yoS#Z;w<b5X)g7uR#?=rM)_4~
zXYKfYQ@!KY)MQ6TaDTJacJgxhzokymcFQSjVZx0yft8$-SMQVNi~mwO-aY4JZ)e_(
z3-dFcQ`yzM4X!wk-F<GWx_1e`X!yKqds>~F!pZHnlJ2f+<~HbNk!`wrjPb>DR?=xm
zU7No)w@uOzyLnP8EA8=2q)ScS#%p=K)#SKIoLUk=v?OkH$=qh$gjRbl*H_wY*w6pZ
z2iQmK{I6~_kCH(E1V8`;KmY_l00ck)1V8`;KmY_T0f9(Z3x>jBEspX3C7=(JAOHd&
z00JNY0w4eaAOHd&00JQJ&qF{O|DO`i|4;q%n1&S)009sH0T2KI5C8!X009sH0T2Lz
zUo8PGyfiq(fBuus_GPoy^MCRC|E|FL0J~HFuX?jK)^f0=vw5QFZ<?0H?~A<<Gt?R7
zuwq8{M~+7Z!w-i}gw_WCC~%f%kUujkx;mNB)2*42HiH>j$I?g}W2VV6h0=6k_xADJ
zuKd1yDetU7C|*9`1cx_{+_rUOi@j7qZ^rB$RAS0`)2#e7ga7QmSlYSD8x_B6s#HXY
z#k$gcX1`q!UERP|Znu{~7#z$DuF1@-NH);VXrL}XqpQ=I1^e0AKtIEe*X3vAk#u^&
zekK~|XXrp(eug7ybHRQtYMh_y$N#+^Kh=L4NcS$-&yEKAdEvRb{QUSvb8x|awl~nv
z50iEId0sKsEZEPs2KxCyM_qn?a7L`XvcPt!wSj(~9k0vJ_g0ww3-*&W(9hFH>+<u=
zSIq2!{nQ)i=c!Y5`FZ+5bK(7w)<8eseX=e;PnFDt_eU)a^z-fGb@}<uHgn<qQF8<R
zJb9rmKi@8x3-6Db8tCU+&(`JVi9P0m`=jM?*-!R~zt^d6_SWU+o6jt`J6axVpqsCM
zt1dU+cx=JF(Q>tcZXVlFmz&4VEx0pUt~AiiSD&uS&DZW(a9^}M+CVpte7!C=|M8Xu
zcSXx14RrIBhw5_k$h8aZiI#^O=;q6t>T>gN>w-I?<)H?;dFYY4+&pxC!Tr$kU<2KJ
z@h5e;`I5TOZb;1k@6wCn{r^YV&)7lsUG^AzkiP|R7yB~1nPu5>*1<yhS^b#)y#6Eo
zd-}Ka-_!5s8SnuC5C8!X009sH0T2KI5C8!X00BP&t<knfBxsuA*{*nC+vN(;fD((Y
zPP#()aU~Xcq|+5b2b5Sil9(0v^EZV*R9oZ<7oJnpk8kX7g%6X8dR}RFg%3Iu^@B5Q
zu5gwwpZVU3R#!NER8h}-g}K71Q;K@}LERPJeNs_Rl{8m)`?#XMv#rGyPF_&dw+qd#
z@Yb`6dSXwLE4<mOsBb<KcZJu#rKoQ_7ITGTI~4WUIn@<jeOggpyGL<_BVSk4f4n8?
z3a>n*s7J1ixWdbu6!max*cA>vqNs<?hnP4Faef%o7k{FtFRAwUpRfOSj-6nyvKQDh
z>@V5(*mu}B*?#uhY!552JK1e)Ex!|BuxnTwi|W7B-_@~Jpx-$T4Fo^{1V8`;KmY_l
z00ck)1VG@@5oiu~R~EeMBH<(ncam^|gcp%;2MM>6a2pA?k}xA-orE<KZXw}j5^f^l
zI0?r{SS4YFgrg)JA>l9yhnm9Ol{Epa@xOLBz)rFk*<Z0gWxvnvVP9qIS(<gT2p{SH
zLVsHSGyP%y%zsqhpl9{Z=&E*xXTS#pKmY_l00ck)1V8`;KmY_l;PN2Q88(B}KRoR6
zmy`Z-r@x%=mlyfV9sY8=zue|8xBAP>U)KF)&0lWumz(|NCVx5ZFUS04)n8Wp<*2_L
z@t4E?awrit<JBhw#{ZYc<q9uA00ck)1V8`;KmY_l00cnb6BBSpf%MI4Wc*KMGXAGB
z8UIt6jQ^=j#{X0%<9{lX@jsQx_@ByT{7+>v{--h-|5KTa|EWyI|5PU9e=3vlKb6V&
zpUPzXPh~Rxr!pD;Q<;qasZ7TIR3_tpDwFX)mF@ZeGXZv%9p|(E|C&9;=l(y;X86qi
zJlo9Y{hMqVpY<Q)ZvZ&1zo7rM{*?Y5J{w@>6Q7sJ2m&Ag0w4eaAOHd&00JNY0wD0|
zB*4NwmHGMNo$y^!JSoLHrFcS$FOuRNQoLP?w@L9<DbA$0F2yw|-Xg`DrFfGRk4y2G
z6j!CVBE_RpJR-%zQaq%Gdn$AP1#fHo&-RP;|IV}b*t_g4_Bwlw9cC}F=h@%!w*mZ^
z{gC|yzbo)vc7Q#>9%qlTZ}8s;?EiG0tf(~zfB*=900@8p2!H?xfB*=900>+pfk;>j
zc8N_=Y&yjzAvTM|rbBGn#imVcTE&KmjV?Bt*tCdEv)D9=O<ZhZVxx+UA~sR6iHJ>D
zY^?SFF#rD|j~lXs00@8p2!H?xfB*=900@A<r6%Cc|EJ5JlllKtCiDNPOy>VnnauyE
zGMWERWitPt%4GgOmC5{nDwFyDR3`KPsZ8eoQ<=>Fr!txUPh~RypUPzZKb6V+e=3vt
z|5PUP|EWyo|5KUF|EDsU|4(Hy|DVca{y&xN`TtJ^*!%4NutV%``P2UIvH!?_0r2nG
zUbd6J`~OyU6I;!H1JJ?W{C{3Qp&!wo<8K6f>QbNHunz(t00JNY0w4eaAOHd&00JOz
zl@rjyOM^p0>qgSqzHGM36H9txot{|26I<kob$DX!o>-eF*6N8dPfYj3G*7I>6KnRw
znmn<%Cl>R>R8LIt#G;;9#1jj9VxgAs()cXFC_i0-?1DIHKFcm>GcAW(%;x<~$D0P@
z564c#)~b&xr<I$dk4HX;+!}r&^vlrg!2^MR3T&P8EStHmtCJZ$-OP-%8O$&fOCxQJ
znVOa<l%@;2w~yy`<@e=F`Jqy|l%F)k?+~1zX}_V6zM(*gg$|hJ<kUpDP%Py}C&r4U
zovXZ2JJWEl_?TvTFf&-VccL^~N3nKf+BEyC-+nlAU9y3W@>MD8a5UY=9W}j<@-;2f
zz5b4NHqy}x&(-0mAev1}j(&WjIq2_bqLGe%n5@&$zBQ7g=M{5}zoUy9>F5U?bvT;I
z^k>sD(GSj;1OAS7G}6(t<8?S{T}=HF(f3xE{r--&H`3A5N9%O7PrjJWe8tTAJKEMr
zM^Bxq*HQV<pMKEn^LMnhk&eFmWWA2|Nr(Pa$;|jW${OkD+sEs4G?SGaeP^4Q_IFfo
zq@yP<)aht8BRTqZ!K~3UX^nLBt!L|XRBlI4>@od&rWGx+qwJIS_3E3v1NV0BbM*C@
z!GVEZndqC(_;*Yznm?hRX`k;T>g(UC!_RbIrq`5D(KjCR@0V6IeL_F|zB{SMcGTf#
ze?RZ)d!;k<*g5}hX+^w|e!lv29e#>#sZZ{fUc1M?S6UHkq@PE=Uaz0hcTv7>w|}R!
zLT#j<uROGHKh5;OpxG<?dE{FEK52#0NIzfRv~WLr#dTEmP4sZ9f0wi(+DJbSJyM6C
z);DJP+sL8w{yox)$S3sEw_W=Ai8}lgJp1K7{w3AFLs}B<WQ!|fx~!SdB6d4(%44~-
z?^rIgX0R{K7peEXentz^(}l5Ic`9F;9-S&qmg$IYAl=`;#%kqx_Mc|5e&e_$p+-38
zf6>h4oS9xTnC`clOzzyZX@BQ}jd1SZU4BP)F6Vj&`g_-`v5v2zKCPLOb9@MIbZLCs
z#fS53RDV`CF#f-o8x<J;_Ycfmhw=Zs_oy)bpZ~6(it#_cm@xjI_hPc{QOScVjQ>5i
z&{d58XE6Ss_fo?6f4*l?HO4RG(G(rISobO9aW%&O)?>Qf48P@|s2Kla{694@_b3+Q
ze{tue;p2btxN$&?|HFDTz`m^iXZ{~PAOHd&00JNY0w4eaAOHd&00LJtftl;N<v9oo
zoUfhnnHv>T&ig&7u5ph{+;dzb-6M;&tV{b(KDlnmCv?<ra#SoDsKZffQin9@C>C9v
z_Md%n-Qq?%8h*S^NBg8%ak0oFY2WE5;`jgEdVheOVMp1|*g^iD|Hs&a>^`=OeVN_N
zvTQm3<$s8;{&!4&UjLE)J^kDI@9FpJMSYuo3(tlR2!H?xfB*=900@8p2!H?xfWTEv
zpf%bSi3Ckk;bVP;kJh_gAsSHlsCbDhgdbP<=yS0vgbs)wWOvO9eA=|afB2kqg$vIq
z>c=;By26J^MLn-1T;YQbMg8E+B3C#&uBh*==x~M8M-}zVSK3|S)G0+h{a~9by!)i0
zo+`Dv!rR9c^_^|Z6;57I)VB+|E4=lrqMq2Jxx$;hiu&d=Ew1qTw-oh_$C_Q?*bYTK
zcCN`4UVU0oU%Myn3P--K@E--myuy*6hF#&6hs2KyR9OhQ!poZ!^>C}=3Wpw1)I;Z^
zuJH2{iu#foVdBWe`H|!E3B->A?D7A>06WJ{uvgg&e8s=N<Zl4{4*MqC&wiWjVFmsU
zz}whbekZ_S*RVGJ7QkQX@9M|(|E>S6e(<V3$x%5F009sH0T2KI5C8!X009sHfy;$J
zbGSR`ylkMGgqM);ViN8m;Uo!nl5m2A7m;uW3AdAQ8wt0PFe71|gf$XwA>n2cZX)40
z3CBoSC1Hhxqa++5;V=n@n!??2hnqG2*ZwfTPO=x-U$H-Bzt8SrUuElAnsu`XAL;*s
zzwQ6e^oRK~|51H|p4C62tJ)duu=W$}Kk;n%fB*=900@8p2!H?xfB*=900>+J0e9)X
zTdY@__gcTy?)I0L_{)p^WwQPsEt{<WM`g19AC<}ae^e&x|52H&|3_uA{vVae`hQd=
z>;F-itp7)4vi={H$@+g(ChPxEnXLauWwQPsmC5>lR3_{HQJJj&M`g19AC<}ae^e&x
z|52H&|3_uA{vVZR$NwkT&-vPaf5QGf+sAV3OUz_TSXh6DKj;6Een9^N{XTuWeyiT6
zU!yD9Y3*f<|1aVKM^+F30T2KI5C8!X009sH0T6HqxI=g6C9c-H^U1i5%4A$e<*G8Q
z_brlp2=x81<Q@W*$vp%rlY0nMCif7iOzt62ncPF5GP#F9WpWRJ%H$pbmB~E>DwBH%
zR3`Tjs7&r5P?_9Apfb6KKxJ|dfy(3_0+q=<1S*qz2vjEb5U5P<AyApzL!ffRp8vmD
z{QmzK`v>-8_I>up><`%Qut`1(;CA*Kte2%&D+}mn_!|KJLI1J-ef^L5-GJZG_vm?j
zGcH*W009sH0T2KI5C8!X009sHfln;~7VfFc$>;03r^8Y_q=$PdbI-+o?{2eOit}gC
zv^aCI6z`JaNh#hb#S>C|kreNc;_XtrO^UZlaVEueDXvNJ7Af8=#havfT#Cn}xGKdJ
zDIS&L_V{1S|Ht$HPwgd(ih}?MfB*=900@8p2!H?xTsi{ov-@FdsYH8?`B0TSFvyos
zR1;w{P8XfD7k!r!_YPQ#=#e=DR?)roil5|pv@5R}B(nu<IWssg(3>I8x*a*wpG~L9
zqkCIU_howf`^YSHSI(x%V`^76v*dw!MNSW9Oft8?maUiZuOScPiQG@7GY~mLp6C-f
zO(rN1*(49lUAb?NJd7vu8Zw=M$o=GrK9T##Oavll$b1VTr^$2%BAaCLgDYnS$?Oaw
z_mdeEM9wnn`9Hfqz&>K<*?a6=_7;1cy~Yl+m)P^{@A%sQf6RXs@E7bUzAnH4_5^#J
zJ<7hp-x08%J;44g&xQ{OfB*=900@8p2!H?xfB*=900@A<oCG3aE!ZtKOT=ce*mQ|a
zQfxZKCLuP9#HK@R+Qp_#Y+A*JiH$Bcn%K06O|#fEiA`K=Vq&9;jUqNtv5AOHSZqS!
zuok!H|36U~|G&Y0!TyH*4?f!e5BygEzst&O2iwMell?lok^c^$oxc(AL;WrN75%6D
ztpMNGpO~|P*a-q400JNY0w4eaAOHd&00JNY0(B73!b^igL+eJ;*}iPH+Y?*ji7ocT
zx;(L@C)VkSB|Nc3o>+$`*6xY5d19@e81uw*PfYW~T0F64Ppruki+f@*PfYd16i+Pb
ziAm#sjUS018w>D%_<#TifB*=900@8p2!H?xfB*=900>+Jftkg@PG<CUYi6X)V1~AA
zX{3!YQ`0hq(sW_>_VL`V{JwlC?<~p3UyEQmLHl<d^o4y&EVkG*C#NRLg<>f;Ix$u(
z?Of%JPM0Sp#h!`M>}?aCt2^Ci_S@yj@Ba_7GyMI37g0fE1pyEM0T2KI5C8!X009sH
z0T2KI5V))euxMK(5;sk8A3@v|5cB_o>|B8V!v_RF00ck)1V8`;KmY_l00ck)1VG?w
zBG44>uB^cz=Kr@Vg#deyt>(Y$pVA}Rf6+F#eAM(vQ#k%md|B)}u_fwv)vWTrcp`j2
z00ck)1VEr30$&|al9^1fSk7-BFXYDZ<@{)VbXOs_yHL)Hn2&VR&>h2Tw+yGYto`DK
z;gnB;)XIH@sp;ZGDYbs%7Gu0$P2RLDIHMLzV}*O&8E`o(o*Nw<o6Z%-T>aOb;@59n
zH@q$7>fZFJO&iI+)JmrqV{cYX4smN8{?;nf@u<Pi&u!4nBHMKL7~_3&R<dyKM9EXh
ze=)aBC%tUYk&T&1Mor$vYk9qYEz48+()8$5ak88%<#!ichJ$mPmGa5vrQOEANvp~A
z+>532dO>loS+Ebqu{k^_jv3?ToX~d`N(B$}fw?U^>1CUaY)tj4$+f(;%)ho)GB54y
z0_=$#z81v}W2R-bn%vBdX8es#7xsvBx#@C#s+=p7$Ys<wxACf+vJ*~%8~9bm%f2DS
z3pXCvd83+Kwk)`RvDI9-=an1EIA-m}!PQeM$=OqB9<~f`+mhP2iT`cguz_2e*gG{^
zNJ%QmXTg$o&vKXZSr<zUOQcMGl1U|66&6hwb<GAXC$)@kFHV&2o2>GCS{SJ+oLH`r
zuX626UhOF4%X_D)tT^qS>=-{<QVpzPOM7>3FHFgaiBHpeCnqQPsWe{LR~VPmS+%V}
z)Y@0g(rqH;!YX@w%QHqkrzFipaKbw6Z6c!gsPq;>NqbK4>NMhn7OLAf+<k+R97qJe
zCKp<9rN}Kzl+0S?y2;$4UCkI-<?piXnN@Uet)iVYZn=(NjDMClI*Cf7W2+Vja?He4
zA@;1URl1bcwIb~>b{M=t<<{)3+TWTD>Ba@vvYT&Fis$0z4fn`Oe^ky%B(i+2mVwHN
zHhXj(!MO8!C7DeG?~#wLwZFpXhB=G1+81xZLNdzNsmWo^rCTn$sudNkuc~FEDz$qg
z{90T4`TxlPJ6X>cI-~^w5C8!X009sH0T2KI5C8!X009uVJPEk#L{|U)ALIYa^O(Ua
z5C8!X009sH0T2KI5C8!X009uFmw-F|r|%QApZ}i?u(S1+hx8x-0w4eaAOHd&00JNY
z0w4eaAOHfFEdf2;Q~B+`SZKf=|9=!<A6>Q{!!Zy50T2KI5C8!X009sH0T2KI5CDOT
zAP^2~ag6^j0wA)200@8p2!H?xfB*=900@8p2!Oy9K)@aUzaL=lUjYuoMGyc15C8!X
z009sH0T2KI5C8!X0D*-PXbCTk4-KswNoV`AS+V|KkX;DyfB1j^2!H?xfB*=900@8p
U2!H?xfB*<weFVb6P%!-e0gX%OUH||9

literal 0
HcmV?d00001

diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..5b9886897baeca437bb4ddb925066fb1d9ea6d74
GIT binary patch
literal 3736
zcmbW%F^*(K5QO1o!NcPeSZJ5UV9izwjRnp=q`k%%m`EIhqi{U_FREDw2QkzAUR7me
zWMpK`{PE`Z*W0(BzPa6QKmGFY=U+cwZ~eS`|Ko=rzJLAV)1Pl{x5xeE(eJ(2<8g0i
zd%xOT_m@8HulwV=gdf+-OE|&KEjn;qTB$us6dltgDUM+TOD^Hj$w}+%x*tkqv51Dk
z`XZBt67SO6-XmBbO=?jTguc5?UzvL$9y+2z!BmK-Z6y1g#dX#tPJ6geLZ45_K$;k>
z_92U9p(eMr)14)S8zU=+XF7AhwKic%FQWe)u6eYzDRw~R!EB#)Igx3{=LD06<}@2k
z7?l#?5U$WPW&Ffyt<?m1Y72<%Dg<tu(u7v-jEkhwh$V<{0}6_2WHfhRCY4e>uxDU$
za_de>G{HiTQpmSbx>BK(*Ej#Zw{PFSd;8(_%TIrOeY^32-RJ%PpQ~3ZpPBTZ1U|S2
zYa(sr9d$L4+yKX?DLB#6YPkN_k9d3QESj7AWGNq+c1$2)q#K*5Vc-`3c%tHQ)`csp
zz>*5C!s)_B#^xQuZ5<a8@C#!!p@L|(Ei1%IopiJ-ZC**$1cX6Om5G;?s@*u7NfX_T
zLD0tSWeEX6+ab=`dfWg}NRbExs$$@dR}-2x2YXhu-x{0>%!TK+qEQ4zpe|%J25`yP
zuXPG&dA5zalb9)s4+zhOqS*tTx20!=f`Cd-|61WtVO^+RR@+TqzvK&?JD>((%BWq0
zTx5y&>a?7KE9MzkKNbGZe@l@}Ce1DN7*pT$nF3C&5YdwYliL~jEnw)J5TmFu$i-@p
zIm<oi8g!yTAFVyRI>p7#6iaP+@D58Mw`!W@$eClfBfvcg6<uG-r<E<ucHQyuG_@g}
z+8bGkjQhvT8LNlz<(|HF0Y=SY)kx)B3$0c#iKQ9t>Nm9l2BM1SC>;b+E`A(J$Jn}s
z%McsY!SQQiJjQikRp2%aa3ek?w9+;%1$%C6^ZCdq7H<S16J`$SE_1q>#s2a`TDTzC
z$|;zNK@zuwjRP4Yl%F_YG{p47TT!MR!JD7f#$LYwld+C8(Dzsy4b6)ODTOhN+J_6G
z?KURhAYRpt*m)<%s41QQlps>57w0>9%A-eAdas_UDx@owNmM}){i5pX*{6EY)ehrZ
zKe6MD0F9^slG4}niNejIK)kWdLmQ3lE!5YRP69*8g49nE2!#jKAi+-~q@UWIP!K5k
zg=JYO%0xtyiblgi>sc>_dLEZUX(65&Lx{=f7eJflCCZUPY86lLGE=e&&5|XpSt#Ws
z^SzYL53=JOXn+ax{*05zjfp>s>*{#PuU0Cq7ZR4L3J0msR=7@vY7bwvqst`6PwEk>
z(dpA&B~8jDULS#HWB_xNqW5PTrzYyokAc?n1i>>SG-?1!qG_$|EaA!iOwek0x`~~<
zXQJw4Zj4*Uz>7Um2Xv0P^RsO^#m~paxO<URhfq1#Q@C9ku;C3tql+4t8>=>}gLPhJ
z&tAOTAC1%&^Exg&!(ht#>xGBN8kCPbl~OvKj#I5|NAd1>8J5p~Wh7SCBU$c=R?us(
zA}Q`!1_|rs+|)tTeX3#^*Q@P^3W-qxkvg;dNf6sFmFtW)5<;%1Y>thBCmXK&a{wz8
zHKn1_R%THmQTI&APPVOj6lFi#>@YITQj&%mTlLv;gcYX8HPa2352`vHLMEg4q=$hs
b<QR{S#WouGXX=_xVnnFMeO<3#{r278*GUfY

literal 0
HcmV?d00001

diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..e141d6074f0ce89fb871be5c6959537d3c2109b8
GIT binary patch
literal 2850
zcmc&$OO7PB5p7GB?X^zfl`(>ZmS#zi!63m%BFG3wHjm(CEd*#Izz6fy4EW{PyE%ZX
z(WtB+`S^a3|NQ3n*V|t|zJGgrqiNUQO+S5keE$67=U-lb`1r?nZ*SLSvZY!VNgiRG
zZoc^(&XY)jRZiL_oXqg1RQI%N+S0fi-=@2B(W$qrUgR31?Y>haU!3Z+yWHXXTq1|d
zj$*fKJ!^2A$!p~qvsjAQtw=kqV)4^=50!LJ-8?!29e2Rd;|7$c7A_@qJG-t8Sj*%t
zQB$=#oM;plk><p<xSVhy0hac>iKDWa_N807xd&cz$Sh)aI{|$A@Gfaqm%=BtR`Ds9
zN?WGTq+UBSNJ(jrDe3Gs>*m~PMn7}+*%MtUuH9^RuY~iZy0a7wZQ5sLR{SJZC|BK^
zx1_MT>-M{dKgg>!VVr?mkX&NKD`r?v%XyC>tER*Ucna6MQDC*SXtosHdMQDi2q&?l
zB7Hl%9q<hLh9=uL=Vo(xs*UqSA|nbfi775Ybr0cFv4GU#nY)*rP3M)V72f$Q2wG~-
zClk<~!Qr_PEHcP4fe$er$9Nv^MGG73GqgsWT>7TD(+!V7>@BzZ4u>39*;?VRzRo?9
z-9%wY5-wNEwC+yT@|m_prYGj3$LigA!=J0wb2F4y?rZ0iS<12;MU%8Vb4g|Cy0@1B
zZoNinqTwqdxcJ;Qjnb-BwhE31i^8(gyVd3pS$0*0oo6#C(CI)YOKKbyZnz+3&HLV4
zAMX(0*)H)|v2=!Q2&_gkFh+HjLU)tciTaGOkRaHtZM_4VUn<K_za<U;%L%qbGO%(D
z&a%@5!9n)pLI$1$TX@x(om=*%z?q>aBnsLGLG@fR3<V^tC(i~zTr0s2_qETI1&X)9
z#vr%BWg7<NL->E5i~sMUF)p?!ohLE>{nBQQGa=n06hdb{!AS~4Jj`nBau5OOg9^JW
z*=Gq{2##<8(ZN3~r_Ew^Sm5j_wXZ0rsyMAtO$0@;q?nE*M|j5t<N-$!R6Y~vs(}{h
zu<Jy`U@~ve1|Tm7afv`!EGERqX6AJ1mM{ucLKg2Ms-@~VtrWg{LiceML>2ja;(ZAM
zaAeK4;c+|7CjpDMVojL`HdMGk)55{kpZ_Iao}btF!)L6!tSP#>5ZN|*ee-n@e|mp^
z|MdFy<KJlL>kFAnGgeLj0*@M~kQ?Qa42@_u3DhCj)+d@66$)GUJv+K+BFuaE@t6xI
z9dc-*b`e%87aa*u={dJ~tPfXMv%vYQYn@beNQVcm8>LHb16sPnA6@wZMQ9dDV|MZZ
zYNgWgg(jYcT6R3pa(FACiBOSYRtUC2g-C{)2mX!H+l7Q286uR?2RK^^NQh-oxc2(v
z*W~>4{`30}ukSwo^)1LLhz6v&feh&A89s~SBvlX0i-x^Z21I)NPq3AvZ<wC;_bhNL
zLIN%J?E@uZ$qL3tF$<g@N9{ggt+o)MbMi<8g4s0nVIAn>94RRAK%Q}QrZi~Pb7<q(
zXpTtJrfOhs)B!IPD29%qb%Hu#RV(Hp=<7H?!~&9X5~Ro-%I+CJu4P=$C=i=KXbPF0
z92D&lQ?X-5L+1e7Ot6|k_}EqvF{RO;F@;O-fb3|T#QtUViFokTGzMbYJL*Tq;En!t
zEqdB01qVl&QjBhS1*K5K;P+~G9TS;{4?yS@-It@&!xW;!J{|LZR4;&lx`9PyFhiH}
z%pqjk%p8GxI#3r4Q>Xw*igyfT?BqN!lX{oib}EAcDMe#k>moZ(bwE}OzD<|$b#`8E
z>rAd_Pu-Lxmh@OhQ9Y!HF7>^5m2Kgd<&9cR2jv~&hi=?MOz??$xGK5@c15MZs$d&A
zfstmm-X|qaC{aTh0A7N<2r3spQ{!H9HyF!~$+~@AQ%k}fLyu?fF?LV9*1m6A)CUp@
zVh1Xy<JTxQ6ry56Vm6DT=tgC5VQ^{maXcwRZh|^)nJb!e_KbV2Rf?Bc3tYR*lke&H
zvQB5Ugrl+z;(D%wY@)+Af<XhLMNARLo^(t-tWQIqjIOSyU58ie&s|R(+*%D@4n?Ly
z2E*XS>x~jf4GMxjw+fnZBhyj)+yNPFiAKQM*ea*73W{$`!)-(g?h*}CtZG14_y9Qe
ge*NjIZ2$1%&+ngpdHwn0x2O32PuYI`<=4Oc2he0q4*&oF

literal 0
HcmV?d00001


From 3f78d05b26642d0966520815d9c2b02367610c96 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 21:34:55 -0600
Subject: [PATCH 22/35] quick update

---
 ...se.ensembl.81.partial.ENSMUSG00000017167.db | Bin 249856 -> 0 bytes
 ...mbl.81.partial.ENSMUSG00000017167.fa.pickle | Bin 3736 -> 0 bytes
 ...bl.81.partial.ENSMUSG00000017167.pep.pickle | Bin 2850 -> 0 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db
 delete mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle
 delete mode 100644 tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle

diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.db
deleted file mode 100644
index e2eba44f73986fcae19abbdd0794bdc1a8aaee76..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 249856
zcmeI5eRv$jo#*GZdqz@s>BXawttB2K8{9F$>KTn>U6#u_vLIfJjgkGtJaCzerLjGG
z@<^7(<|Wy?)gJ$teQsIECJ*}%?l!Rdc%QxGF5L5E@Ak3|**pigFCi>}i!olX{lFdA
zfL{RP%VXH*s=9mBzpm+#G<dRp;X9AjuKIRW{eJc7?y9Nony!(}8;a#ZYRAOX?tD3w
z3A6@+!N6~%Qh`7q#Q$#Mf0a*`2O^aS|365=wSu9*jZ;rV`Q8xwP^9T+7ubg_tC|ls
z?`m#p`a$eV>JQYLl)qPQj{Y#ZKJv53-N8SPTp#QX9FPi}>Qs|!mj!27u{2h=w>*_E
zO^;3$C(Ai2p4(X{6>_Eg?t;4~IdsSH+AYJW^&8g>Z%etloFlbqBiWl;Ia`!5(~?k=
zo4L`9ztQQ!9$_*!UCvLHbA{5FYov2-<5f9jC!7Q~q^z=UNb$mr2k!4slgpL`_kW?B
z-#$)i>Y82TnBB7WiyMYLwQ|$s_7$e4ixZ{Pmf>w%QX4n%zpWcKa7z<=r$!61O7e-i
z%DZP_$vX=7PLyg{B4rlG?2KDCuHU?MICc9S>u+0o$DOHLhwr4G3BLR|dc3vS%VsTq
zwihSL_f1y$IxXxv_S|X{XKLiLs#0;(&A6kGFYleIvNAe4Hl3?Z%@3ed1FP85-rd^^
zQ*tecPt$uRCnxyXHD1_P7?;yowXJbDt$o#gOioRd3&oO;hm}3P<r$epO7fOv!GLuo
zOz#=zZALD?w>)8Ae#BwZ4JRhs)#Mj?f-}u_F6%NGofzY1hMmn}WnVwmjt<XtLw0y`
zh|7iSOs#ZEGVW_rlf&F%vWCS<C5vMud;2^_-Aq+>-Cf3H>)e%fP9tKsZ63Ro6Hl(J
zqZ>0ZrY1M>+ODcmTlY{{Z6h(=I*(~7XH~tW{l-j4SChALS9<1m#lxO0+!1~<%Or>!
z#W7=2oBIejmjuC}&ttuE5y^HP-PqTnCWm;fJ8IP0N+;S)VpCg)O{=Z-wJP=)Gp)^P
za)cWmsA0IOgCP#I%;P{+-YV?Az0+?ssmY$6;Qnr_cOv#ku-P$f_fKR;Y9%?8zAext
z?x5Uu-||{&%N-!T?Q@M@1-%jf&fzwnGP=qh-*dqji7QDn5wv?B`(hMd3v*64Ae1K0
zSzet+oYX>f`;NO~N^&3({F-%1kqi}Aieo~I*DBXd<`(U0#)#_gveTH0qGPp+CLMw8
za-|0tSNON$MCDsC@tIsrqQMY*)LNxWX<hH79mbBRn!JfyQ)NOVp$$n{fGxW@x9j66
zz_>G_B(sU&J+c|AewFG(Ynhl$<yK@?GRk58{eV}YTQ0k*(Gz(hwQN+Sc5V5#jk|(s
zGLZ=G&sc4vC|dl5oZ{`)P$*~y?Xd_sA8It8l}kbz#9T>V%I6&OxGhJ0E#vheCHeV8
zt?%jL9Lt$}%SYGUEBIz{hsL)-_GEzn!v_RF00ck)1V8`;KmY_l00ck)1VG@@5x6VZ
zsTo?grbXHerfCT+qG`<3v_fgRuzUM>ZdZO^zLXy-l}q_aQ&A7zWtx@S2mGO0v9xoQ
z822l&mUaEZ8%J*2I&w>|^)Uy`{(&Z5uOK@a;Q#Oe0T2KI5C8!X009sH0T2KI5C8!X
zxZ(*U!e+eI`2S3Row?#GhzfuJ2!H?xfB*=900@8p2!H?xfB*<M1X#2!5{aAEJOME!
zz<&OJF2K$K5eR?)2!H?xfB*=900@8p2!H?xfWTEnpefuPcYZHmkN-aku#c{)8lo~F
z00JNY0w4eaAOHd&00JNY0w4ea4uNo3i(~wc(*XoP00ck)1V8`;KmY_l00ck)1g>rZ
z82?}0#}~B&0T2KI5C8!X009sH0T2KI5C8!|0ONn02Ot0fAOHd&00JNY0w4eaAOHd&
zaP<?w`2XrZ#;73(fB*=900@8p2!H?xfB*=900@Zjzs4~H*;s)8!v_RF00ck)1V8`;
zKmY_l00ck)1VG>-2+S-Fb~2-<TQehV1~astr4h!MscD%)X}Yj``*?0weqX+ncV5Z=
zx!zvW2@Y=@xozvnmR|8OdoyP5prXF`6VsfWnkW~FrQGPmSh2Kol{Y$F&QF!au8Gpn
z(7KUywlACI+5V?#_S@y%VwKlx4w(G|_WS?N1lXC2s35X}00@8p2!H?xfB*=900@8p
z2!H?xTvh~Fv@H^eo2IyrAZ`lS<NtF3cJ8ur6V8AD2!H?xfB*=900@8p2!H?xfB*<w
zEP<wQcifp_V2}S#2H44qEfo1d00ck)1V8`;KmY_l00ck)1V8`;E<J%n*o@bj|Bvzi
zr9VFK00ck)1V8`;KmY_l00ck)1V8`;8bQDv|I_yg*w6pZ2H4p~*gzQ|00JNY0w4ea
zAOHd&00JNY0w4eamjwYm+*6tRFJ3fgkN-aku#YYa@8ARofB*=900@8p2!H?xfB*=9
z00@9Ug9(WB|1kb<FjN!=0w4eaAOHd&00JNY0w4eaAOHfF83A|v|9*hIf0;Q4cR&CH
zKmY_l00ck)1V8`;KmY_l00b_EKudUOd}wIhNIKh>&1$@)kbWq@_V5ipAOHd&00JNY
z0w4eaAOHd&00JOzWf7Q3hB}$i)2*42HiH@3vZWD?F;mkrh0=6k_xADJuKd1yDetU;
zCw>X<1npPk(^upv>d#M@=H%2wxlk<SMkmIKrJbw1(ehNjG(9?1oGi}{td&^HI<vp}
zh5Iut!A`cg;#pQ(ra7LOvPWq@k1}fp`_hVf@Gd`(Mhnx^g|VFLjT=(bBR|dhxFhEO
zhuDh&_99jQys}Plcnbm`00JNY0w4eaAOHd&00JQJDJRewHiOl7F+x@E|F>=>*yI1B
z0d|HR{giV<^+5mxKmY_l00ck)1V8`;KmY_l;7TRX8f}Y2f~G0vFo+omAtr>l5DwlY
z<^<T|{}Tarj-9wtD+#|r00ck)1V8`;KmY_l00ck)1V8`;90JYZ?x6F7f>2YqyRru0
z?EL?e{6_%zfB*=900@8p2!H?xfB*=900@8p2wW)y-1+~v^jaTt!0aC&^Z%)=vHut7
z2p$b+W9<2sQ!U?US=D^7c~^5&(+^@_Qh%V{r2M^dbM%MN^^u=N?hYOe{&}!Fa3C<q
zQH&?m<W0+hGitFkR=C%F(Lm0M=XMrKg<NsW)w_o77+$+&IJJJ`y5VgpSNEn*ZQ4lo
zrB*t{7*m~UaxJ&U=CD@E?=HCICKq5&<nXm9b{I1)2{pNy8_oC|oi6MV>2lNM{8Twt
zD3Qu`&TYIZr|g82;D(e{_6;dsxRF_;ByU+33|P)g?-?(a3%UH>@`TOXt$5CK!-)s(
z?@*J=mIe2Jp`717PVl&C7CF9dS^LEe!yfo<n%us^)O2y8l-e@9ZA)t7CjPf|!v^l&
z#NMgVf~=B!7GWuqbxi$s6z-iU)v`p&ERNX*w{Bd&dFycM_B+<!w)T!YQ@0M^Nj(z>
zg`aF5Z|wsnyKDKgy*N?6Z?dW?)55M}&#g9brba%iDiufFj5`YX^4_T`E2E=h)BIwQ
z9pgt`s)1E(Y47gsg(*2P@o9SR<m3cDCC3Z<3gdD*tF|=`r*pN*St{>3srIn4$G1FV
zvRzGnp(i-gY@f2$Wx`*d!Vj{2&N|h$uODkihv&K>JG?o><wAC*Ryrja_qD0XVQw*5
z!(zqX;uy)^K95m1Q<YtJmoeEocV(S3g4k`F$8O~uk}K=z#!QT<$xXbrt7_DioD$-4
zBc@yDF)ih+s<*V?m<j1>@>cFj&-|`<*t3N@!p}aL1aYG{W=v{x9|7m=CK&X2tXHlF
z*{-7-`&!iG5U+JdjapmjB=p)sY+9|OuT`<fm}zZRlOx>lKn=rH9So^-%RCNL<*mZ*
z*E>D%TTN=Rrzg0-+v;eDJrZnoOxs-z*^ydF4yA8na*6vIw+XkrmYQV;h}>>khrdP#
zfsTcLeQ=vk8C_+M@3~-%#FeC(2-;tL?2A!+3C%g(fKZw|XL)rRaZ(G_?K|#{DanCE
z@N3p3MKV-eDUJy-UaMR;nOn5086&E{%T8l1ijLJPnsfxV%atBrT;bn}6P0hp#Ak9f
zi3UULQEQbhrFFfRb{IRN{2M>FW`E=Nw`N097GTS6&h7dTOS~PwGomE3iQql*fv_xA
zsuQhcVm6gqky*(ohxzvdUWIPC?5ajj<cZX>QI*=Y<=Zyy3aZIOBDg<ewT+@^@fUK6
zw_8J@pc%A>736%V(R@}e32A6?C4Il1bNJ%69QCz~*N2qk=M%LCiQ*i~nPMB}BlwuO
zp<zG&KQ4a%e_X`z0Ra#I0T2KI5C8!X009sH0T2KI5V!&f=;5BqTz|gGL7IhoDnA1d
ziynl;`hV?_(*fnH(HElsDsr0L$C~v&)URuPp*hzaYx-|ZBk>dQN8|r8_O9~H$R9=4
zh5v8(;o$XwM+J$dZ~y9e%(Seg58tQD6O&@;h1q9^Rkz^W6n&oCcCPL`L#)avZr-`6
zQY(FL;qAQ9evZD_dAw0^$IY1W+|F}#+|>IN-PCiR6MfP<cq5-94+`wtob=g-P|}_g
zygH3Ip@mAGZQMYfZIBGElR`9WmFp&Ri*_|*WR<_mwjustW}?(vt7s>UTdpG*<DaF^
zHf+@bL2eF<hXG>G>RP2sX;YgpYpJUFa~WMevzE55upgLcR%<H`vG`l*9A^80iBQnC
zqHXm|NUO9;{?Cm!EmxAOJA(h(diuv#K=#fm;==WwhNLQmXr;L5wyWEE_@l}W=TW7b
zyy8W1jBD4`DAP?eTdXS?o38bBRh;yErWh}FMU7&e6tiVIabwpqK0BtPG8xPD)xG?4
zqB-R|me(j*N;zvm(v9sYHTijNDJ)?o>8u*`%x}kjNX&imT<H86@usD|c&rCpR^ivy
zDBMXg>%0>;wthxQW;%k?lF!y|hs(0LiWF(Qg$l)(yoS#Z;w<b5X)g7uR#?=rM)_4~
zXYKfYQ@!KY)MQ6TaDTJacJgxhzokymcFQSjVZx0yft8$-SMQVNi~mwO-aY4JZ)e_(
z3-dFcQ`yzM4X!wk-F<GWx_1e`X!yKqds>~F!pZHnlJ2f+<~HbNk!`wrjPb>DR?=xm
zU7No)w@uOzyLnP8EA8=2q)ScS#%p=K)#SKIoLUk=v?OkH$=qh$gjRbl*H_wY*w6pZ
z2iQmK{I6~_kCH(E1V8`;KmY_l00ck)1V8`;KmY_T0f9(Z3x>jBEspX3C7=(JAOHd&
z00JNY0w4eaAOHd&00JQJ&qF{O|DO`i|4;q%n1&S)009sH0T2KI5C8!X009sH0T2Lz
zUo8PGyfiq(fBuus_GPoy^MCRC|E|FL0J~HFuX?jK)^f0=vw5QFZ<?0H?~A<<Gt?R7
zuwq8{M~+7Z!w-i}gw_WCC~%f%kUujkx;mNB)2*42HiH>j$I?g}W2VV6h0=6k_xADJ
zuKd1yDetU7C|*9`1cx_{+_rUOi@j7qZ^rB$RAS0`)2#e7ga7QmSlYSD8x_B6s#HXY
z#k$gcX1`q!UERP|Znu{~7#z$DuF1@-NH);VXrL}XqpQ=I1^e0AKtIEe*X3vAk#u^&
zekK~|XXrp(eug7ybHRQtYMh_y$N#+^Kh=L4NcS$-&yEKAdEvRb{QUSvb8x|awl~nv
z50iEId0sKsEZEPs2KxCyM_qn?a7L`XvcPt!wSj(~9k0vJ_g0ww3-*&W(9hFH>+<u=
zSIq2!{nQ)i=c!Y5`FZ+5bK(7w)<8eseX=e;PnFDt_eU)a^z-fGb@}<uHgn<qQF8<R
zJb9rmKi@8x3-6Db8tCU+&(`JVi9P0m`=jM?*-!R~zt^d6_SWU+o6jt`J6axVpqsCM
zt1dU+cx=JF(Q>tcZXVlFmz&4VEx0pUt~AiiSD&uS&DZW(a9^}M+CVpte7!C=|M8Xu
zcSXx14RrIBhw5_k$h8aZiI#^O=;q6t>T>gN>w-I?<)H?;dFYY4+&pxC!Tr$kU<2KJ
z@h5e;`I5TOZb;1k@6wCn{r^YV&)7lsUG^AzkiP|R7yB~1nPu5>*1<yhS^b#)y#6Eo
zd-}Ka-_!5s8SnuC5C8!X009sH0T2KI5C8!X00BP&t<knfBxsuA*{*nC+vN(;fD((Y
zPP#()aU~Xcq|+5b2b5Sil9(0v^EZV*R9oZ<7oJnpk8kX7g%6X8dR}RFg%3Iu^@B5Q
zu5gwwpZVU3R#!NER8h}-g}K71Q;K@}LERPJeNs_Rl{8m)`?#XMv#rGyPF_&dw+qd#
z@Yb`6dSXwLE4<mOsBb<KcZJu#rKoQ_7ITGTI~4WUIn@<jeOggpyGL<_BVSk4f4n8?
z3a>n*s7J1ixWdbu6!max*cA>vqNs<?hnP4Faef%o7k{FtFRAwUpRfOSj-6nyvKQDh
z>@V5(*mu}B*?#uhY!552JK1e)Ex!|BuxnTwi|W7B-_@~Jpx-$T4Fo^{1V8`;KmY_l
z00ck)1VG@@5oiu~R~EeMBH<(ncam^|gcp%;2MM>6a2pA?k}xA-orE<KZXw}j5^f^l
zI0?r{SS4YFgrg)JA>l9yhnm9Ol{Epa@xOLBz)rFk*<Z0gWxvnvVP9qIS(<gT2p{SH
zLVsHSGyP%y%zsqhpl9{Z=&E*xXTS#pKmY_l00ck)1V8`;KmY_l;PN2Q88(B}KRoR6
zmy`Z-r@x%=mlyfV9sY8=zue|8xBAP>U)KF)&0lWumz(|NCVx5ZFUS04)n8Wp<*2_L
z@t4E?awrit<JBhw#{ZYc<q9uA00ck)1V8`;KmY_l00cnb6BBSpf%MI4Wc*KMGXAGB
z8UIt6jQ^=j#{X0%<9{lX@jsQx_@ByT{7+>v{--h-|5KTa|EWyI|5PU9e=3vlKb6V&
zpUPzXPh~Rxr!pD;Q<;qasZ7TIR3_tpDwFX)mF@ZeGXZv%9p|(E|C&9;=l(y;X86qi
zJlo9Y{hMqVpY<Q)ZvZ&1zo7rM{*?Y5J{w@>6Q7sJ2m&Ag0w4eaAOHd&00JNY0wD0|
zB*4NwmHGMNo$y^!JSoLHrFcS$FOuRNQoLP?w@L9<DbA$0F2yw|-Xg`DrFfGRk4y2G
z6j!CVBE_RpJR-%zQaq%Gdn$AP1#fHo&-RP;|IV}b*t_g4_Bwlw9cC}F=h@%!w*mZ^
z{gC|yzbo)vc7Q#>9%qlTZ}8s;?EiG0tf(~zfB*=900@8p2!H?xfB*=900>+pfk;>j
zc8N_=Y&yjzAvTM|rbBGn#imVcTE&KmjV?Bt*tCdEv)D9=O<ZhZVxx+UA~sR6iHJ>D
zY^?SFF#rD|j~lXs00@8p2!H?xfB*=900@A<r6%Cc|EJ5JlllKtCiDNPOy>VnnauyE
zGMWERWitPt%4GgOmC5{nDwFyDR3`KPsZ8eoQ<=>Fr!txUPh~RypUPzZKb6V+e=3vt
z|5PUP|EWyo|5KUF|EDsU|4(Hy|DVca{y&xN`TtJ^*!%4NutV%``P2UIvH!?_0r2nG
zUbd6J`~OyU6I;!H1JJ?W{C{3Qp&!wo<8K6f>QbNHunz(t00JNY0w4eaAOHd&00JOz
zl@rjyOM^p0>qgSqzHGM36H9txot{|26I<kob$DX!o>-eF*6N8dPfYj3G*7I>6KnRw
znmn<%Cl>R>R8LIt#G;;9#1jj9VxgAs()cXFC_i0-?1DIHKFcm>GcAW(%;x<~$D0P@
z564c#)~b&xr<I$dk4HX;+!}r&^vlrg!2^MR3T&P8EStHmtCJZ$-OP-%8O$&fOCxQJ
znVOa<l%@;2w~yy`<@e=F`Jqy|l%F)k?+~1zX}_V6zM(*gg$|hJ<kUpDP%Py}C&r4U
zovXZ2JJWEl_?TvTFf&-VccL^~N3nKf+BEyC-+nlAU9y3W@>MD8a5UY=9W}j<@-;2f
zz5b4NHqy}x&(-0mAev1}j(&WjIq2_bqLGe%n5@&$zBQ7g=M{5}zoUy9>F5U?bvT;I
z^k>sD(GSj;1OAS7G}6(t<8?S{T}=HF(f3xE{r--&H`3A5N9%O7PrjJWe8tTAJKEMr
zM^Bxq*HQV<pMKEn^LMnhk&eFmWWA2|Nr(Pa$;|jW${OkD+sEs4G?SGaeP^4Q_IFfo
zq@yP<)aht8BRTqZ!K~3UX^nLBt!L|XRBlI4>@od&rWGx+qwJIS_3E3v1NV0BbM*C@
z!GVEZndqC(_;*Yznm?hRX`k;T>g(UC!_RbIrq`5D(KjCR@0V6IeL_F|zB{SMcGTf#
ze?RZ)d!;k<*g5}hX+^w|e!lv29e#>#sZZ{fUc1M?S6UHkq@PE=Uaz0hcTv7>w|}R!
zLT#j<uROGHKh5;OpxG<?dE{FEK52#0NIzfRv~WLr#dTEmP4sZ9f0wi(+DJbSJyM6C
z);DJP+sL8w{yox)$S3sEw_W=Ai8}lgJp1K7{w3AFLs}B<WQ!|fx~!SdB6d4(%44~-
z?^rIgX0R{K7peEXentz^(}l5Ic`9F;9-S&qmg$IYAl=`;#%kqx_Mc|5e&e_$p+-38
zf6>h4oS9xTnC`clOzzyZX@BQ}jd1SZU4BP)F6Vj&`g_-`v5v2zKCPLOb9@MIbZLCs
z#fS53RDV`CF#f-o8x<J;_Ycfmhw=Zs_oy)bpZ~6(it#_cm@xjI_hPc{QOScVjQ>5i
z&{d58XE6Ss_fo?6f4*l?HO4RG(G(rISobO9aW%&O)?>Qf48P@|s2Kla{694@_b3+Q
ze{tue;p2btxN$&?|HFDTz`m^iXZ{~PAOHd&00JNY0w4eaAOHd&00LJtftl;N<v9oo
zoUfhnnHv>T&ig&7u5ph{+;dzb-6M;&tV{b(KDlnmCv?<ra#SoDsKZffQin9@C>C9v
z_Md%n-Qq?%8h*S^NBg8%ak0oFY2WE5;`jgEdVheOVMp1|*g^iD|Hs&a>^`=OeVN_N
zvTQm3<$s8;{&!4&UjLE)J^kDI@9FpJMSYuo3(tlR2!H?xfB*=900@8p2!H?xfWTEv
zpf%bSi3Ckk;bVP;kJh_gAsSHlsCbDhgdbP<=yS0vgbs)wWOvO9eA=|afB2kqg$vIq
z>c=;By26J^MLn-1T;YQbMg8E+B3C#&uBh*==x~M8M-}zVSK3|S)G0+h{a~9by!)i0
zo+`Dv!rR9c^_^|Z6;57I)VB+|E4=lrqMq2Jxx$;hiu&d=Ew1qTw-oh_$C_Q?*bYTK
zcCN`4UVU0oU%Myn3P--K@E--myuy*6hF#&6hs2KyR9OhQ!poZ!^>C}=3Wpw1)I;Z^
zuJH2{iu#foVdBWe`H|!E3B->A?D7A>06WJ{uvgg&e8s=N<Zl4{4*MqC&wiWjVFmsU
zz}whbekZ_S*RVGJ7QkQX@9M|(|E>S6e(<V3$x%5F009sH0T2KI5C8!X009sHfy;$J
zbGSR`ylkMGgqM);ViN8m;Uo!nl5m2A7m;uW3AdAQ8wt0PFe71|gf$XwA>n2cZX)40
z3CBoSC1Hhxqa++5;V=n@n!??2hnqG2*ZwfTPO=x-U$H-Bzt8SrUuElAnsu`XAL;*s
zzwQ6e^oRK~|51H|p4C62tJ)duu=W$}Kk;n%fB*=900@8p2!H?xfB*=900>+J0e9)X
zTdY@__gcTy?)I0L_{)p^WwQPsEt{<WM`g19AC<}ae^e&x|52H&|3_uA{vVae`hQd=
z>;F-itp7)4vi={H$@+g(ChPxEnXLauWwQPsmC5>lR3_{HQJJj&M`g19AC<}ae^e&x
z|52H&|3_uA{vVZR$NwkT&-vPaf5QGf+sAV3OUz_TSXh6DKj;6Een9^N{XTuWeyiT6
zU!yD9Y3*f<|1aVKM^+F30T2KI5C8!X009sH0T6HqxI=g6C9c-H^U1i5%4A$e<*G8Q
z_brlp2=x81<Q@W*$vp%rlY0nMCif7iOzt62ncPF5GP#F9WpWRJ%H$pbmB~E>DwBH%
zR3`Tjs7&r5P?_9Apfb6KKxJ|dfy(3_0+q=<1S*qz2vjEb5U5P<AyApzL!ffRp8vmD
z{QmzK`v>-8_I>up><`%Qut`1(;CA*Kte2%&D+}mn_!|KJLI1J-ef^L5-GJZG_vm?j
zGcH*W009sH0T2KI5C8!X009sHfln;~7VfFc$>;03r^8Y_q=$PdbI-+o?{2eOit}gC
zv^aCI6z`JaNh#hb#S>C|kreNc;_XtrO^UZlaVEueDXvNJ7Af8=#havfT#Cn}xGKdJ
zDIS&L_V{1S|Ht$HPwgd(ih}?MfB*=900@8p2!H?xTsi{ov-@FdsYH8?`B0TSFvyos
zR1;w{P8XfD7k!r!_YPQ#=#e=DR?)roil5|pv@5R}B(nu<IWssg(3>I8x*a*wpG~L9
zqkCIU_howf`^YSHSI(x%V`^76v*dw!MNSW9Oft8?maUiZuOScPiQG@7GY~mLp6C-f
zO(rN1*(49lUAb?NJd7vu8Zw=M$o=GrK9T##Oavll$b1VTr^$2%BAaCLgDYnS$?Oaw
z_mdeEM9wnn`9Hfqz&>K<*?a6=_7;1cy~Yl+m)P^{@A%sQf6RXs@E7bUzAnH4_5^#J
zJ<7hp-x08%J;44g&xQ{OfB*=900@8p2!H?xfB*=900@A<oCG3aE!ZtKOT=ce*mQ|a
zQfxZKCLuP9#HK@R+Qp_#Y+A*JiH$Bcn%K06O|#fEiA`K=Vq&9;jUqNtv5AOHSZqS!
zuok!H|36U~|G&Y0!TyH*4?f!e5BygEzst&O2iwMell?lok^c^$oxc(AL;WrN75%6D
ztpMNGpO~|P*a-q400JNY0w4eaAOHd&00JNY0(B73!b^igL+eJ;*}iPH+Y?*ji7ocT
zx;(L@C)VkSB|Nc3o>+$`*6xY5d19@e81uw*PfYW~T0F64Ppruki+f@*PfYd16i+Pb
ziAm#sjUS018w>D%_<#TifB*=900@8p2!H?xfB*=900>+Jftkg@PG<CUYi6X)V1~AA
zX{3!YQ`0hq(sW_>_VL`V{JwlC?<~p3UyEQmLHl<d^o4y&EVkG*C#NRLg<>f;Ix$u(
z?Of%JPM0Sp#h!`M>}?aCt2^Ci_S@yj@Ba_7GyMI37g0fE1pyEM0T2KI5C8!X009sH
z0T2KI5V))euxMK(5;sk8A3@v|5cB_o>|B8V!v_RF00ck)1V8`;KmY_l00ck)1VG?w
zBG44>uB^cz=Kr@Vg#deyt>(Y$pVA}Rf6+F#eAM(vQ#k%md|B)}u_fwv)vWTrcp`j2
z00ck)1VEr30$&|al9^1fSk7-BFXYDZ<@{)VbXOs_yHL)Hn2&VR&>h2Tw+yGYto`DK
z;gnB;)XIH@sp;ZGDYbs%7Gu0$P2RLDIHMLzV}*O&8E`o(o*Nw<o6Z%-T>aOb;@59n
zH@q$7>fZFJO&iI+)JmrqV{cYX4smN8{?;nf@u<Pi&u!4nBHMKL7~_3&R<dyKM9EXh
ze=)aBC%tUYk&T&1Mor$vYk9qYEz48+()8$5ak88%<#!ichJ$mPmGa5vrQOEANvp~A
z+>532dO>loS+Ebqu{k^_jv3?ToX~d`N(B$}fw?U^>1CUaY)tj4$+f(;%)ho)GB54y
z0_=$#z81v}W2R-bn%vBdX8es#7xsvBx#@C#s+=p7$Ys<wxACf+vJ*~%8~9bm%f2DS
z3pXCvd83+Kwk)`RvDI9-=an1EIA-m}!PQeM$=OqB9<~f`+mhP2iT`cguz_2e*gG{^
zNJ%QmXTg$o&vKXZSr<zUOQcMGl1U|66&6hwb<GAXC$)@kFHV&2o2>GCS{SJ+oLH`r
zuX626UhOF4%X_D)tT^qS>=-{<QVpzPOM7>3FHFgaiBHpeCnqQPsWe{LR~VPmS+%V}
z)Y@0g(rqH;!YX@w%QHqkrzFipaKbw6Z6c!gsPq;>NqbK4>NMhn7OLAf+<k+R97qJe
zCKp<9rN}Kzl+0S?y2;$4UCkI-<?piXnN@Uet)iVYZn=(NjDMClI*Cf7W2+Vja?He4
zA@;1URl1bcwIb~>b{M=t<<{)3+TWTD>Ba@vvYT&Fis$0z4fn`Oe^ky%B(i+2mVwHN
zHhXj(!MO8!C7DeG?~#wLwZFpXhB=G1+81xZLNdzNsmWo^rCTn$sudNkuc~FEDz$qg
z{90T4`TxlPJ6X>cI-~^w5C8!X009sH0T2KI5C8!X009uVJPEk#L{|U)ALIYa^O(Ua
z5C8!X009sH0T2KI5C8!X009uFmw-F|r|%QApZ}i?u(S1+hx8x-0w4eaAOHd&00JNY
z0w4eaAOHfFEdf2;Q~B+`SZKf=|9=!<A6>Q{!!Zy50T2KI5C8!X009sH0T2KI5CDOT
zAP^2~ag6^j0wA)200@8p2!H?xfB*=900@8p2!Oy9K)@aUzaL=lUjYuoMGyc15C8!X
z009sH0T2KI5C8!X0D*-PXbCTk4-KswNoV`AS+V|KkX;DyfB1j^2!H?xfB*=900@8p
U2!H?xfB*<weFVb6P%!-e0gX%OUH||9

diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.fa.pickle
deleted file mode 100644
index 5b9886897baeca437bb4ddb925066fb1d9ea6d74..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3736
zcmbW%F^*(K5QO1o!NcPeSZJ5UV9izwjRnp=q`k%%m`EIhqi{U_FREDw2QkzAUR7me
zWMpK`{PE`Z*W0(BzPa6QKmGFY=U+cwZ~eS`|Ko=rzJLAV)1Pl{x5xeE(eJ(2<8g0i
zd%xOT_m@8HulwV=gdf+-OE|&KEjn;qTB$us6dltgDUM+TOD^Hj$w}+%x*tkqv51Dk
z`XZBt67SO6-XmBbO=?jTguc5?UzvL$9y+2z!BmK-Z6y1g#dX#tPJ6geLZ45_K$;k>
z_92U9p(eMr)14)S8zU=+XF7AhwKic%FQWe)u6eYzDRw~R!EB#)Igx3{=LD06<}@2k
z7?l#?5U$WPW&Ffyt<?m1Y72<%Dg<tu(u7v-jEkhwh$V<{0}6_2WHfhRCY4e>uxDU$
za_de>G{HiTQpmSbx>BK(*Ej#Zw{PFSd;8(_%TIrOeY^32-RJ%PpQ~3ZpPBTZ1U|S2
zYa(sr9d$L4+yKX?DLB#6YPkN_k9d3QESj7AWGNq+c1$2)q#K*5Vc-`3c%tHQ)`csp
zz>*5C!s)_B#^xQuZ5<a8@C#!!p@L|(Ei1%IopiJ-ZC**$1cX6Om5G;?s@*u7NfX_T
zLD0tSWeEX6+ab=`dfWg}NRbExs$$@dR}-2x2YXhu-x{0>%!TK+qEQ4zpe|%J25`yP
zuXPG&dA5zalb9)s4+zhOqS*tTx20!=f`Cd-|61WtVO^+RR@+TqzvK&?JD>((%BWq0
zTx5y&>a?7KE9MzkKNbGZe@l@}Ce1DN7*pT$nF3C&5YdwYliL~jEnw)J5TmFu$i-@p
zIm<oi8g!yTAFVyRI>p7#6iaP+@D58Mw`!W@$eClfBfvcg6<uG-r<E<ucHQyuG_@g}
z+8bGkjQhvT8LNlz<(|HF0Y=SY)kx)B3$0c#iKQ9t>Nm9l2BM1SC>;b+E`A(J$Jn}s
z%McsY!SQQiJjQikRp2%aa3ek?w9+;%1$%C6^ZCdq7H<S16J`$SE_1q>#s2a`TDTzC
z$|;zNK@zuwjRP4Yl%F_YG{p47TT!MR!JD7f#$LYwld+C8(Dzsy4b6)ODTOhN+J_6G
z?KURhAYRpt*m)<%s41QQlps>57w0>9%A-eAdas_UDx@owNmM}){i5pX*{6EY)ehrZ
zKe6MD0F9^slG4}niNejIK)kWdLmQ3lE!5YRP69*8g49nE2!#jKAi+-~q@UWIP!K5k
zg=JYO%0xtyiblgi>sc>_dLEZUX(65&Lx{=f7eJflCCZUPY86lLGE=e&&5|XpSt#Ws
z^SzYL53=JOXn+ax{*05zjfp>s>*{#PuU0Cq7ZR4L3J0msR=7@vY7bwvqst`6PwEk>
z(dpA&B~8jDULS#HWB_xNqW5PTrzYyokAc?n1i>>SG-?1!qG_$|EaA!iOwek0x`~~<
zXQJw4Zj4*Uz>7Um2Xv0P^RsO^#m~paxO<URhfq1#Q@C9ku;C3tql+4t8>=>}gLPhJ
z&tAOTAC1%&^Exg&!(ht#>xGBN8kCPbl~OvKj#I5|NAd1>8J5p~Wh7SCBU$c=R?us(
zA}Q`!1_|rs+|)tTeX3#^*Q@P^3W-qxkvg;dNf6sFmFtW)5<;%1Y>thBCmXK&a{wz8
zHKn1_R%THmQTI&APPVOj6lFi#>@YITQj&%mTlLv;gcYX8HPa2352`vHLMEg4q=$hs
b<QR{S#WouGXX=_xVnnFMeO<3#{r278*GUfY

diff --git a/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle b/tests/data/mouse.ensembl.81.partial.ENSMUSG00000017167.pep.pickle
deleted file mode 100644
index e141d6074f0ce89fb871be5c6959537d3c2109b8..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 2850
zcmc&$OO7PB5p7GB?X^zfl`(>ZmS#zi!63m%BFG3wHjm(CEd*#Izz6fy4EW{PyE%ZX
z(WtB+`S^a3|NQ3n*V|t|zJGgrqiNUQO+S5keE$67=U-lb`1r?nZ*SLSvZY!VNgiRG
zZoc^(&XY)jRZiL_oXqg1RQI%N+S0fi-=@2B(W$qrUgR31?Y>haU!3Z+yWHXXTq1|d
zj$*fKJ!^2A$!p~qvsjAQtw=kqV)4^=50!LJ-8?!29e2Rd;|7$c7A_@qJG-t8Sj*%t
zQB$=#oM;plk><p<xSVhy0hac>iKDWa_N807xd&cz$Sh)aI{|$A@Gfaqm%=BtR`Ds9
zN?WGTq+UBSNJ(jrDe3Gs>*m~PMn7}+*%MtUuH9^RuY~iZy0a7wZQ5sLR{SJZC|BK^
zx1_MT>-M{dKgg>!VVr?mkX&NKD`r?v%XyC>tER*Ucna6MQDC*SXtosHdMQDi2q&?l
zB7Hl%9q<hLh9=uL=Vo(xs*UqSA|nbfi775Ybr0cFv4GU#nY)*rP3M)V72f$Q2wG~-
zClk<~!Qr_PEHcP4fe$er$9Nv^MGG73GqgsWT>7TD(+!V7>@BzZ4u>39*;?VRzRo?9
z-9%wY5-wNEwC+yT@|m_prYGj3$LigA!=J0wb2F4y?rZ0iS<12;MU%8Vb4g|Cy0@1B
zZoNinqTwqdxcJ;Qjnb-BwhE31i^8(gyVd3pS$0*0oo6#C(CI)YOKKbyZnz+3&HLV4
zAMX(0*)H)|v2=!Q2&_gkFh+HjLU)tciTaGOkRaHtZM_4VUn<K_za<U;%L%qbGO%(D
z&a%@5!9n)pLI$1$TX@x(om=*%z?q>aBnsLGLG@fR3<V^tC(i~zTr0s2_qETI1&X)9
z#vr%BWg7<NL->E5i~sMUF)p?!ohLE>{nBQQGa=n06hdb{!AS~4Jj`nBau5OOg9^JW
z*=Gq{2##<8(ZN3~r_Ew^Sm5j_wXZ0rsyMAtO$0@;q?nE*M|j5t<N-$!R6Y~vs(}{h
zu<Jy`U@~ve1|Tm7afv`!EGERqX6AJ1mM{ucLKg2Ms-@~VtrWg{LiceML>2ja;(ZAM
zaAeK4;c+|7CjpDMVojL`HdMGk)55{kpZ_Iao}btF!)L6!tSP#>5ZN|*ee-n@e|mp^
z|MdFy<KJlL>kFAnGgeLj0*@M~kQ?Qa42@_u3DhCj)+d@66$)GUJv+K+BFuaE@t6xI
z9dc-*b`e%87aa*u={dJ~tPfXMv%vYQYn@beNQVcm8>LHb16sPnA6@wZMQ9dDV|MZZ
zYNgWgg(jYcT6R3pa(FACiBOSYRtUC2g-C{)2mX!H+l7Q286uR?2RK^^NQh-oxc2(v
z*W~>4{`30}ukSwo^)1LLhz6v&feh&A89s~SBvlX0i-x^Z21I)NPq3AvZ<wC;_bhNL
zLIN%J?E@uZ$qL3tF$<g@N9{ggt+o)MbMi<8g4s0nVIAn>94RRAK%Q}QrZi~Pb7<q(
zXpTtJrfOhs)B!IPD29%qb%Hu#RV(Hp=<7H?!~&9X5~Ro-%I+CJu4P=$C=i=KXbPF0
z92D&lQ?X-5L+1e7Ot6|k_}EqvF{RO;F@;O-fb3|T#QtUViFokTGzMbYJL*Tq;En!t
zEqdB01qVl&QjBhS1*K5K;P+~G9TS;{4?yS@-It@&!xW;!J{|LZR4;&lx`9PyFhiH}
z%pqjk%p8GxI#3r4Q>Xw*igyfT?BqN!lX{oib}EAcDMe#k>moZ(bwE}OzD<|$b#`8E
z>rAd_Pu-Lxmh@OhQ9Y!HF7>^5m2Kgd<&9cR2jv~&hi=?MOz??$xGK5@c15MZs$d&A
zfstmm-X|qaC{aTh0A7N<2r3spQ{!H9HyF!~$+~@AQ%k}fLyu?fF?LV9*1m6A)CUp@
zVh1Xy<JTxQ6ry56Vm6DT=tgC5VQ^{maXcwRZh|^)nJb!e_KbV2Rf?Bc3tYR*lke&H
zvQB5Ugrl+z;(D%wY@)+Af<XhLMNARLo^(t-tWQIqjIOSyU58ie&s|R(+*%D@4n?Ly
z2E*XS>x~jf4GMxjw+fnZBhyj)+yNPFiAKQM*ea*73W{$`!)-(g?h*}CtZG14_y9Qe
ge*NjIZ2$1%&+ngpdHwn0x2O32PuYI`<=4Oc2he0q4*&oF


From 6d840b421d5bb6b266cb26acce19a7474a09404e Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 21:52:04 -0600
Subject: [PATCH 23/35] ensemblrelease suport

---
 pyensembl/config.py                   |  2 +-
 pyensembl/ensembl_release.py          | 29 +++++++++++----------------
 pyensembl/ensembl_release_versions.py |  2 ++
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/pyensembl/config.py b/pyensembl/config.py
index 3dfd54a..91ae5ed 100644
--- a/pyensembl/config.py
+++ b/pyensembl/config.py
@@ -157,7 +157,7 @@
         "latin_name": "arabidopsis_thaliana",
         "synonyms": ["cress", "thale_cress"],
         "reference_assemblies": {
-            "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+            "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
         },
     },
 ]
diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 521e2d7..1ed0bcf 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -16,7 +16,6 @@
 """
 from weakref import WeakValueDictionary
 
-from .config import MAX_ENSEMBL_RELEASE  # ENSEMBL_FTP_SERVER,
 from .ensembl_release_versions import check_release_number
 from .ensembl_url_templates import make_fasta_url, make_gtf_url
 from .genome import Genome
@@ -29,16 +28,6 @@ class EnsemblRelease(Genome):
     particular release of the Ensembl database.
     """
 
-    @classmethod
-    def normalize_init_values(cls, release, species, server):
-        """
-        Normalizes the arguments which uniquely specify an EnsemblRelease
-        genome.
-        """
-        release = check_release_number(release)
-        species = check_species_object(species)
-        return (release, species, server)
-
     # Using a WeakValueDictionary instead of an ordinary dict to prevent a
     # memory leak in cases where we test many different releases in sequence.
     # When all the references to a particular EnsemblRelease die then that
@@ -48,8 +37,9 @@ def normalize_init_values(cls, release, species, server):
     @classmethod
     def cached(
         cls,
-        release=MAX_ENSEMBL_RELEASE,
+        release=None,
         species=human,
+        database=None,
         server=None,
         # server=ENSEMBL_FTP_SERVER,
     ):
@@ -57,7 +47,10 @@ def cached(
         Construct EnsemblRelease if it's never been made before, otherwise
         return an old instance.
         """
-        init_args_tuple = cls.normalize_init_values(release, species, server)
+        release = check_release_number(release, database)
+        species = check_species_object(species)
+        init_args_tuple = (release, species, database, server)
+
         if init_args_tuple in cls._genome_cache:
             genome = cls._genome_cache[init_args_tuple]
         else:
@@ -66,14 +59,16 @@ def cached(
 
     def __init__(
         self,
-        release=MAX_ENSEMBL_RELEASE,
+        release=None,
         species=human,
+        database=None,
         server=None,
         # server=EMBL_FTP_SERVER,,
     ):
-        self.release, self.species, self.server = self.normalize_init_values(
-            release=release, species=species, server=server
-        )
+        self.release = check_release_number(release, database)
+        self.species = check_species_object(species)
+        self.database = database
+        self.server = server
 
         self.gtf_url = make_gtf_url(
             ensembl_release=self.release,
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 05d4a15..9a5bc46 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -22,6 +22,8 @@ def check_release_number(release, database=None):
     """
     Check to make sure a release is in the valid range of Ensembl releases.
     """
+    if release is None:
+        return MAX_ENSEMBL_RELEASE if database is None else MAX_ENSEMBLGENOME_RELEASE
     try:
         release = int(release)
     except ValueError:

From 9a0cf7fb3ab0362046b1237cc07353d56102cbb1 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 21:54:44 -0600
Subject: [PATCH 24/35] ensemblrelease suport

---
 pyensembl/ensembl_release.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 1ed0bcf..0055d6a 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -47,8 +47,8 @@ def cached(
         Construct EnsemblRelease if it's never been made before, otherwise
         return an old instance.
         """
-        release = check_release_number(release, database)
         species = check_species_object(species)
+        release = check_release_number(release, species.database)
         init_args_tuple = (release, species, database, server)
 
         if init_args_tuple in cls._genome_cache:
@@ -65,8 +65,8 @@ def __init__(
         server=None,
         # server=EMBL_FTP_SERVER,,
     ):
-        self.release = check_release_number(release, database)
         self.species = check_species_object(species)
+        self.release = check_release_number(release, species.database)
         self.database = database
         self.server = server
 

From f5c537dd1168b1d06db20d972a508a9e3568b360 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 21:58:53 -0600
Subject: [PATCH 25/35] ensemblrelease suport

---
 pyensembl/ensembl_url_templates.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/pyensembl/ensembl_url_templates.py b/pyensembl/ensembl_url_templates.py
index 4fcf774..e00968b 100644
--- a/pyensembl/ensembl_url_templates.py
+++ b/pyensembl/ensembl_url_templates.py
@@ -38,7 +38,9 @@
 DATABASE_FASTA_SUBDIR_TEMPLATE = (
     "/pub/release-%(release)d/%(database)s/fasta/%(species)s/%(type)s/"
 )
-DATABASE_GTF_SUBDIR_TEMPLATE = "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
+DATABASE_GTF_SUBDIR_TEMPLATE = (
+    "/pub/release-%(release)d/%(database)s/gtf/%(species)s/"
+)
 
 # GTF annotation file example: Homo_sapiens.GTCh38.gtf.gz
 GTF_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(release)d.gtf.gz"
@@ -52,11 +54,15 @@
 # ncRNA FASTA file for releases before (and including) Ensembl 75
 # example: Homo_sapiens.NCBI36.54.ncrna.fa.gz
 
-OLD_FASTA_FILENAME_TEMPLATE_NCRNA = "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
+OLD_FASTA_FILENAME_TEMPLATE_NCRNA = (
+    "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
+)
 
 # cDNA & protein FASTA file for releases after Ensembl 75
 # example: Homo_sapiens.GRCh37.cdna.all.fa.gz
-NEW_FASTA_FILENAME_TEMPLATE = "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
+NEW_FASTA_FILENAME_TEMPLATE = (
+    "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
+)
 
 # ncRNA FASTA file for releases after Ensembl 75
 # example: Homo_sapiens.GRCh37.ncrna.fa.gz
@@ -68,9 +74,11 @@ def normalize_release_properties(ensembl_release, species):
     Make sure a given release is valid, normalize it to be an integer,
     normalize the species name, and get its associated reference.
     """
-    ensembl_release = check_release_number(ensembl_release)
     if not isinstance(species, Species):
         species = find_species_by_name(species)
+    ensembl_release = check_release_number(
+        ensembl_release, database=species.database
+    )
     reference_name = species.which_reference(ensembl_release)
     return ensembl_release, species.latin_name, reference_name
 
@@ -99,7 +107,9 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
             server = ENSEMBL_FTP_SERVER
         else:
             server = ENSEMBLGENOME_FTP_SERVER
-    ensembl_release, species, _ = normalize_release_properties(ensembl_release, species)
+    ensembl_release, species, _ = normalize_release_properties(
+        ensembl_release, species
+    )
     if database is None:
         subdir = GTF_SUBDIR_TEMPLATE % {
             "release": ensembl_release,
@@ -111,7 +121,9 @@ def make_gtf_url(ensembl_release, species, server=None, database=None):
             "database": database,
             "species": species,
         }
-    filename = make_gtf_filename(ensembl_release=ensembl_release, species=species)
+    filename = make_gtf_filename(
+        ensembl_release=ensembl_release, species=species
+    )
     return server + subdir + filename
 
 
@@ -172,7 +184,7 @@ def make_fasta_url(
             server = ENSEMBL_FTP_SERVER
         else:
             server = ENSEMBLGENOME_FTP_SERVER
-    ensembl_release, species, reference_name = normalize_release_properties(
+    ensembl_release, species, _ = normalize_release_properties(
         ensembl_release, species
     )
     if database is None:

From 9d1ce7c376f295bdeafe4dec3b173bad81232851 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 22:01:29 -0600
Subject: [PATCH 26/35] ensemblrelease suport, fix bu

---
 pyensembl/ensembl_release.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/ensembl_release.py b/pyensembl/ensembl_release.py
index 0055d6a..497b503 100644
--- a/pyensembl/ensembl_release.py
+++ b/pyensembl/ensembl_release.py
@@ -66,7 +66,7 @@ def __init__(
         # server=EMBL_FTP_SERVER,,
     ):
         self.species = check_species_object(species)
-        self.release = check_release_number(release, species.database)
+        self.release = check_release_number(release, self.species.database)
         self.database = database
         self.server = server
 

From a7e8b5b389537e10d6767a8be3259dd20fb24585 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 22:33:49 -0600
Subject: [PATCH 27/35] ensemblrelease suport, fix bug

---
 pyensembl/__init__.py                 | 27 ++++-----
 pyensembl/common.py                   | 14 ++---
 pyensembl/config.py                   |  4 +-
 pyensembl/database.py                 | 85 ++++++++++++++++-----------
 pyensembl/ensembl_release_versions.py |  8 +--
 pyensembl/fasta.py                    |  3 +-
 pyensembl/genome.py                   |  2 +-
 pyensembl/normalization.py            |  3 +-
 pyensembl/reference_name.py           |  6 +-
 pyensembl/sequence_data.py            | 10 ++--
 pyensembl/species.py                  |  1 +
 11 files changed, 86 insertions(+), 77 deletions(-)

diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py
index eeb28fb..75e8360 100644
--- a/pyensembl/__init__.py
+++ b/pyensembl/__init__.py
@@ -10,27 +10,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .config import MAX_ENSEMBL_RELEASE, MAX_ENSEMBLGENOME_RELEASE
 from .database import Database
 from .download_cache import DownloadCache
 from .ensembl_release import EnsemblRelease, cached_release
-from .ensembl_release_versions import MAX_ENSEMBL_RELEASE
 from .exon import Exon
-from .genome import Genome
 from .gene import Gene
+from .genome import Genome
 from .locus import Locus
-from .reference_name import (
-    ensembl_grch36,
-    ensembl_grch37,
-    ensembl_grch38,
-    normalize_reference_name,
-    find_species_by_reference,
-    which_reference,
-    genome_for_reference_name,
-)
-
+from .reference_name import (  # ensembl_grch36,; ensembl_grch37,; ensembl_grch38,
+    find_species_by_reference, genome_for_reference_name,
+    normalize_reference_name, which_reference)
 from .search import find_nearest_locus
 from .sequence_data import SequenceData
-from .species import find_species_by_name, check_species_object, normalize_species_name
+from .species import (check_species_object, find_species_by_name,
+                      normalize_species_name)
 from .transcript import Transcript
 from .version import __version__
 
@@ -41,6 +35,7 @@
     "EnsemblRelease",
     "cached_release",
     "MAX_ENSEMBL_RELEASE",
+    "MAX_ENSEMBLGENOME_RELEASE",
     "Gene",
     "Transcript",
     "Exon",
@@ -56,7 +51,7 @@
     "Genome",
     "Locus",
     "Exon",
-    "ensembl_grch36",
-    "ensembl_grch37",
-    "ensembl_grch38",
+    # "ensembl_grch36",
+    # "ensembl_grch37",
+    # "ensembl_grch38",
 ]
diff --git a/pyensembl/common.py b/pyensembl/common.py
index ccc5eb1..a9a3964 100644
--- a/pyensembl/common.py
+++ b/pyensembl/common.py
@@ -11,7 +11,6 @@
 # limitations under the License.
 
 import pickle
-
 from functools import wraps
 
 
@@ -28,10 +27,11 @@ def load_pickle(filepath):
 
 
 def _memoize_cache_key(args, kwargs):
-    """Turn args tuple and kwargs dictionary into a hashable key.
+    """
+    Turn args tuple and kwargs dictionary into a hashable key.
 
-    Expects that all arguments to a memoized function are either hashable
-    or can be uniquely identified from type(arg) and repr(arg).
+    Expects that all arguments to a memoized function are either
+    hashable or can be uniquely identified from type(arg) and repr(arg).
     """
     cache_key_list = []
 
@@ -51,9 +51,9 @@ def _memoize_cache_key(args, kwargs):
 
 
 def memoize(fn):
-    """Simple reset-able memoization decorator for functions and methods,
-    assumes that all arguments to the function can be hashed and
-    compared.
+    """
+    Simple reset-able memoization decorator for functions and methods, assumes
+    that all arguments to the function can be hashed and compared.
     """
     cache = {}
 
diff --git a/pyensembl/config.py b/pyensembl/config.py
index 91ae5ed..cd58605 100644
--- a/pyensembl/config.py
+++ b/pyensembl/config.py
@@ -152,12 +152,14 @@
         "reference_assemblies": {
             "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
         },
+        "database": "plants",
     },
     {
         "latin_name": "arabidopsis_thaliana",
-        "synonyms": ["cress", "thale_cress"],
+        "synonyms": ["cress", "thale_cress", "hehe"],
         "reference_assemblies": {
             "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
         },
+        "database": "plants",
     },
 ]
diff --git a/pyensembl/database.py b/pyensembl/database.py
index 4286908..562aa06 100644
--- a/pyensembl/database.py
+++ b/pyensembl/database.py
@@ -11,16 +11,16 @@
 # limitations under the License.
 
 import logging
-from os.path import split, join, exists, splitext
 import sqlite3
+from os.path import exists, join, split, splitext
 
 import datacache
+from gtfparse import create_missing_features, read_gtf
 from typechecks import require_integer, require_string
-from gtfparse import read_gtf, create_missing_features
 
 from .common import memoize
-from .normalization import normalize_chromosome, normalize_strand
 from .locus import Locus
+from .normalization import normalize_chromosome, normalize_strand
 
 # any time we update the database schema, increment this version number
 DATABASE_SCHEMA_VERSION = 3
@@ -31,9 +31,9 @@
 
 class Database(object):
     """
-    Wrapper around sqlite3 database so that the rest of the
-    library doesn't have to worry about constructing the .db file or
-    writing SQL queries directly.
+    Wrapper around sqlite3 database so that the rest of the library doesn't
+    have to worry about constructing the .db file or writing SQL queries
+    directly.
     """
 
     def __init__(
@@ -104,8 +104,8 @@ def local_db_path(self):
 
     def _all_possible_indices(self, column_names):
         """
-        Create list of tuples containing all possible index groups
-        we might want to create over tables in this database.
+        Create list of tuples containing all possible index groups we might
+        want to create over tables in this database.
 
         If a set of genome annotations is missing some column we want
         to index on, we have to drop any indices which use that column.
@@ -136,7 +136,8 @@ def _all_possible_indices(self, column_names):
                 # other GTFs)
                 if column_name not in column_set:
                     logger.info(
-                        "Skipping database index for {%s}", ", ".join(column_group)
+                        "Skipping database index for {%s}",
+                        ", ".join(column_group),
                     )
                     skip = True
             if skip:
@@ -149,7 +150,8 @@ def _all_possible_indices(self, column_names):
     PRIMARY_KEY_COLUMNS = {"gene": "gene_id", "transcript": "transcript_id"}
 
     def _get_primary_key(self, feature_name, feature_df):
-        """Name of primary key for a feature table (e.g. "gene" -> "gene_id")
+        """
+        Name of primary key for a feature table (e.g. "gene" -> "gene_id")
 
         Since we're potentially going to run this code over unseen data,
         make sure that the primary is unique and never null.
@@ -163,18 +165,21 @@ def _get_primary_key(self, feature_name, feature_df):
         if primary_key_values.isnull().any():
             raise ValueError(
                 "Column '%s' can't be primary key of table '%s'"
-                " because it contains nulls values" % (primary_key, feature_name)
+                " because it contains nulls values"
+                % (primary_key, feature_name)
             )
         elif len(primary_key_values.unique()) < len(primary_key_values):
             raise ValueError(
                 "Column '%s' can't be primary key of table '%s'"
-                " because it contains repeated values" % (primary_key, feature_name)
+                " because it contains repeated values"
+                % (primary_key, feature_name)
             )
         else:
             return primary_key
 
     def _feature_indices(self, all_index_groups, primary_key, feature_df):
-        """Choose subset of index group tuples from `all_index_groups` which are
+        """
+        Choose subset of index group tuples from `all_index_groups` which are
         applicable to a particular feature (not same as its primary key, have
         non-null values).
         """
@@ -194,9 +199,8 @@ def _feature_indices(self, all_index_groups, primary_key, feature_df):
 
     def create(self, overwrite=False):
         """
-        Create the local database (including indexing) if it's not
-        already set up. If `overwrite` is True, always re-create
-        the database from scratch.
+        Create the local database (including indexing) if it's not already set
+        up. If `overwrite` is True, always re-create the database from scratch.
 
         Returns a connection to the database.
         """
@@ -204,7 +208,8 @@ def create(self, overwrite=False):
         datacache.ensure_dir(self.cache_directory_path)
 
         df = self._load_gtf_as_dataframe(
-            usecols=self.restrict_gtf_columns, features=self.restrict_gtf_features
+            usecols=self.restrict_gtf_columns,
+            features=self.restrict_gtf_features,
         )
         all_index_groups = self._all_possible_indices(df.columns)
 
@@ -261,7 +266,7 @@ def _get_connection(self):
     @property
     def connection(self):
         """
-        Get a connection to the database or raise an exception
+        Get a connection to the database or raise an exception.
         """
         connection = self._get_connection()
         if connection:
@@ -275,6 +280,7 @@ def connection(self):
     def connect_or_create(self, overwrite=False):
         """
         Return a connection to the database if it exists, otherwise create it.
+
         Overwrite the existing database if `overwrite` is True.
         """
         connection = self._get_connection()
@@ -306,8 +312,8 @@ def column_values_at_locus(
         sorted=False,
     ):
         """
-        Get the non-null values of a column from the database
-        at a particular range of loci
+        Get the non-null values of a column from the database at a particular
+        range of loci.
         """
 
         # TODO: combine with the query method, since they overlap
@@ -408,8 +414,8 @@ def distinct_column_values_at_locus(
 
     def run_sql_query(self, sql, required=False, query_params=[]):
         """
-        Given an arbitrary SQL query, run it against the database
-        and return the results.
+        Given an arbitrary SQL query, run it against the database and return
+        the results.
 
         Parameters
         ----------
@@ -454,8 +460,8 @@ def query(
         required=False,
     ):
         """
-        Construct a SQL query and run against the sqlite3 database,
-        filtered both by the feature type and a user-provided column/value.
+        Construct a SQL query and run against the sqlite3 database, filtered
+        both by the feature type and a user-provided column/value.
         """
         sql = """
             SELECT %s%s
@@ -468,7 +474,9 @@ def query(
             filter_column,
         )
         query_params = [filter_value]
-        return self.run_sql_query(sql, required=required, query_params=query_params)
+        return self.run_sql_query(
+            sql, required=required, query_params=query_params
+        )
 
     def query_one(
         self,
@@ -490,7 +498,9 @@ def query_one(
 
         if len(results) == 0:
             if required:
-                raise ValueError("%s not found: %s" % (filter_column, filter_value))
+                raise ValueError(
+                    "%s not found: %s" % (filter_column, filter_value)
+                )
             else:
                 return None
         elif len(results) > 1:
@@ -505,8 +515,8 @@ def query_feature_values(
         self, column, feature, distinct=True, contig=None, strand=None
     ):
         """
-        Run a SQL query against the sqlite3 database, filtered
-        only on the feature type.
+        Run a SQL query against the sqlite3 database, filtered only on the
+        feature type.
         """
         query = """
             SELECT %s%s
@@ -541,7 +551,6 @@ def query_loci(self, filter_column, filter_value, feature):
         """
         Query for loci satisfying a given filter and feature type.
 
-
         Parameters
         ----------
         filter_column : str
@@ -571,8 +580,8 @@ def query_loci(self, filter_column, filter_value, feature):
 
     def query_locus(self, filter_column, filter_value, feature):
         """
-        Query for unique locus, raises error if missing or more than
-        one locus in the database.
+        Query for unique locus, raises error if missing or more than one locus
+        in the database.
 
         Parameters
         ----------
@@ -588,7 +597,9 @@ def query_locus(self, filter_column, filter_value, feature):
         Returns single Locus object.
         """
         loci = self.query_loci(
-            filter_column=filter_column, filter_value=filter_value, feature=feature
+            filter_column=filter_column,
+            filter_value=filter_value,
+            feature=feature,
         )
 
         if len(loci) == 0:
@@ -605,7 +616,7 @@ def query_locus(self, filter_column, filter_value, feature):
 
     def _load_gtf_as_dataframe(self, usecols=None, features=None):
         """
-        Parse this genome source's GTF file and load it as a Pandas DataFrame
+        Parse this genome source's GTF file and load it as a Pandas DataFrame.
         """
         logger.info("Reading GTF from %s", self.gtf_path)
         df = read_gtf(
@@ -621,7 +632,9 @@ def _load_gtf_as_dataframe(self, usecols=None, features=None):
 
         column_names = set(df.keys())
         expect_gene_feature = features is None or "gene" in features
-        expect_transcript_feature = features is None or "transcript" in features
+        expect_transcript_feature = (
+            features is None or "transcript" in features
+        )
         observed_features = set(df["feature"])
 
         # older Ensembl releases don't have "gene" or "transcript"
@@ -635,7 +648,9 @@ def _load_gtf_as_dataframe(self, usecols=None, features=None):
                 dataframe=df,
                 unique_keys={"gene": "gene_id"},
                 extra_columns={
-                    "gene": {"gene_name", "gene_biotype"}.intersection(column_names),
+                    "gene": {"gene_name", "gene_biotype"}.intersection(
+                        column_names
+                    ),
                 },
                 missing_value="",
             )
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index 9a5bc46..d31612a 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -10,12 +10,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .config import (
-    MAX_ENSEMBL_RELEASE,
-    MAX_ENSEMBLGENOME_RELEASE,
-    MIN_ENSEMBL_RELEASE,
-    MIN_ENSEMBLGENOME_RELEASE,
-)
+from .config import (MAX_ENSEMBL_RELEASE, MAX_ENSEMBLGENOME_RELEASE,
+                     MIN_ENSEMBL_RELEASE, MIN_ENSEMBLGENOME_RELEASE)
 
 
 def check_release_number(release, database=None):
diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index e339a8a..3e8ba92 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -19,9 +19,8 @@
 """
 
 
-from gzip import GzipFile
 import logging
-
+from gzip import GzipFile
 
 logger = logging.getLogger(__name__)
 
diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index e56dd2e..355841b 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -21,8 +21,8 @@
 
 from serializable import Serializable
 
-from .download_cache import DownloadCache
 from .database import Database
+from .download_cache import DownloadCache
 from .exon import Exon
 from .gene import Gene
 from .sequence_data import SequenceData
diff --git a/pyensembl/normalization.py b/pyensembl/normalization.py
index fb0cc33..81f65c5 100644
--- a/pyensembl/normalization.py
+++ b/pyensembl/normalization.py
@@ -11,7 +11,8 @@
 # limitations under the License.
 
 from sys import intern
-from typechecks import is_string, is_integer
+
+from typechecks import is_integer, is_string
 
 # Manually memoizing here, since our simple common.memoize function has
 # noticable overhead in this instance.
diff --git a/pyensembl/reference_name.py b/pyensembl/reference_name.py
index 1b7639d..dbb8d1f 100644
--- a/pyensembl/reference_name.py
+++ b/pyensembl/reference_name.py
@@ -70,6 +70,6 @@ def genome_for_reference_name(reference_name, allow_older_downloaded_release=Tru
     return EnsemblRelease.cached(release=max_ensembl_release, species=species)
 
 
-ensembl_grch36 = genome_for_reference_name("ncbi36")
-ensembl_grch37 = genome_for_reference_name("grch37")
-ensembl_grch38 = genome_for_reference_name("grch38")
+# ensembl_grch36 = genome_for_reference_name("ncbi36")
+# ensembl_grch37 = genome_for_reference_name("grch37")
+# ensembl_grch38 = genome_for_reference_name("grch38")
diff --git a/pyensembl/sequence_data.py b/pyensembl/sequence_data.py
index 631c748..c2a6e0d 100644
--- a/pyensembl/sequence_data.py
+++ b/pyensembl/sequence_data.py
@@ -10,14 +10,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from os import remove
-from os.path import exists, abspath, split, join
 import logging
-from collections import Counter
 import pickle
-from .common import load_pickle, dump_pickle
-from .fasta import parse_fasta_dictionary
+from collections import Counter
+from os import remove
+from os.path import abspath, exists, join, split
 
+from .common import dump_pickle, load_pickle
+from .fasta import parse_fasta_dictionary
 
 logger = logging.getLogger(__name__)
 
diff --git a/pyensembl/species.py b/pyensembl/species.py
index fe8f3b0..ca477e7 100644
--- a/pyensembl/species.py
+++ b/pyensembl/species.py
@@ -205,4 +205,5 @@ def check_species_object(species_name_or_object):
         latin_name=data["latin_name"],
         synonyms=data["synonyms"],
         reference_assemblies=data["reference_assemblies"],
+        database=data.get("database", None),
     )

From 9ead6571713ee972acb259558ebe2e3467b34c6e Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 23:20:41 -0600
Subject: [PATCH 28/35] update more species

---
 pyensembl/config.py | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/pyensembl/config.py b/pyensembl/config.py
index cd58605..faaa3a5 100644
--- a/pyensembl/config.py
+++ b/pyensembl/config.py
@@ -146,19 +146,35 @@
         "synonyms": ["yeast", "budding_yeast"],
         "reference_assemblies": {"R64-1-1": (75, MAX_ENSEMBL_RELEASE)},
     },
+    {
+        "latin_name": "arabidopsis_thaliana",
+        "synonyms": ["cress", "thale_cress", "hehe"],
+        "reference_assemblies": {
+            "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
     {
         "latin_name": "oryza_sativa",
-        "synonyms": ["rice", "japanese_rice"],
+        "synonyms": ["rice"],
         "reference_assemblies": {
             "IRGSP-1.0": (55, MAX_ENSEMBLGENOME_RELEASE),
         },
         "database": "plants",
     },
     {
-        "latin_name": "arabidopsis_thaliana",
-        "synonyms": ["cress", "thale_cress", "hehe"],
+        "latin_name": "zea_mays",
+        "synonyms": ["maize"],
         "reference_assemblies": {
-            "TAIR10": (55, MAX_ENSEMBLGENOME_RELEASE),
+            "Zm-B73-REFERENCE-NAM-5.0": (55, MAX_ENSEMBLGENOME_RELEASE),
+        },
+        "database": "plants",
+    },
+    {
+        "latin_name": "glycine_max",
+        "synonyms": ["soybean"],
+        "reference_assemblies": {
+            "Glycine_max_v2.1": (55, MAX_ENSEMBLGENOME_RELEASE),
         },
         "database": "plants",
     },

From 41612a03a8a6f21f8ac6e239dde76bddf8ed42fb Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 23:43:37 -0600
Subject: [PATCH 29/35] format code

---
 docs/conf.py                          |   8 +-
 pyensembl/__init__.py                 |  14 +-
 pyensembl/download_cache.py           |  19 ++-
 pyensembl/ensembl_release_versions.py |  14 +-
 pyensembl/exon.py                     |   4 +-
 pyensembl/fasta.py                    |   3 +-
 pyensembl/gene.py                     |   7 +-
 pyensembl/genome.py                   | 204 +++++++++++++++++---------
 pyensembl/locus.py                    |  25 +++-
 pyensembl/reference_name.py           |  12 +-
 pyensembl/sequence_data.py            |  12 +-
 pyensembl/transcript.py               |  33 +++--
 tests/common.py                       |   4 +-
 tests/data.py                         |   8 +-
 tests/test_download_cache.py          |   6 +-
 tests/test_exon_id.py                 |   6 +-
 tests/test_exon_object.py             |  12 +-
 tests/test_gene_ids.py                |   8 +-
 tests/test_gene_names.py              |   5 +-
 tests/test_gene_objects.py            |  16 +-
 tests/test_id_length.py               |   4 +-
 tests/test_missing_genome_sources.py  |  25 +++-
 tests/test_mouse.py                   |   5 +-
 tests/test_release_versions.py        |   4 +-
 tests/test_search.py                  |   8 +-
 tests/test_sequence_data.py           |  20 ++-
 tests/test_serialization.py           |   8 +-
 tests/test_shell.py                   |   4 +-
 tests/test_timings.py                 |  12 +-
 tests/test_transcript_ids.py          |   7 +-
 tests/test_transcript_objects.py      |  38 +++--
 tests/test_ucsc_gtf.py                |  24 ++-
 32 files changed, 412 insertions(+), 167 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 1c4034e..aefddaa 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -220,7 +220,13 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, "pyensembl.tex", "pyensembl Documentation", "Hammer Lab", "manual"),
+    (
+        master_doc,
+        "pyensembl.tex",
+        "pyensembl Documentation",
+        "Hammer Lab",
+        "manual",
+    ),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
diff --git a/pyensembl/__init__.py b/pyensembl/__init__.py
index 75e8360..991af8c 100644
--- a/pyensembl/__init__.py
+++ b/pyensembl/__init__.py
@@ -19,12 +19,18 @@
 from .genome import Genome
 from .locus import Locus
 from .reference_name import (  # ensembl_grch36,; ensembl_grch37,; ensembl_grch38,
-    find_species_by_reference, genome_for_reference_name,
-    normalize_reference_name, which_reference)
+    find_species_by_reference,
+    genome_for_reference_name,
+    normalize_reference_name,
+    which_reference,
+)
 from .search import find_nearest_locus
 from .sequence_data import SequenceData
-from .species import (check_species_object, find_species_by_name,
-                      normalize_species_name)
+from .species import (
+    check_species_object,
+    find_species_by_name,
+    normalize_species_name,
+)
 from .transcript import Transcript
 from .version import __version__
 
diff --git a/pyensembl/download_cache.py b/pyensembl/download_cache.py
index 47a0766..48ebd00 100644
--- a/pyensembl/download_cache.py
+++ b/pyensembl/download_cache.py
@@ -151,7 +151,10 @@ def _fields(self):
         )
 
     def __eq__(self, other):
-        return other.__class__ is DownloadCache and self._fields() == other._fields()
+        return (
+            other.__class__ is DownloadCache
+            and self._fields() == other._fields()
+        )
 
     def __hash__(self):
         return hash(self._fields())
@@ -213,10 +216,14 @@ def cached_path(self, path_or_url):
         # if we expect the download function to decompress this file then
         # we should use its name without the compression extension
         if self.decompress_on_download:
-            local_filename = self._remove_compression_suffix_if_present(local_filename)
+            local_filename = self._remove_compression_suffix_if_present(
+                local_filename
+            )
 
         if len(local_filename) == 0:
-            raise ValueError("Can't determine local filename for %s" % (path_or_url,))
+            raise ValueError(
+                "Can't determine local filename for %s" % (path_or_url,)
+            )
 
         return join(self.cache_directory_path, local_filename)
 
@@ -319,9 +326,9 @@ def delete_cached_files(self, prefixes=[], suffixes=[]):
         """
         if isdir(self.cache_directory_path):
             for filename in listdir():
-                delete = any([filename.endswith(ext) for ext in suffixes]) or any(
-                    [filename.startswith(pre) for pre in prefixes]
-                )
+                delete = any(
+                    [filename.endswith(ext) for ext in suffixes]
+                ) or any([filename.startswith(pre) for pre in prefixes])
                 if delete:
                     path = join(self.cache_directory_path, filename)
                     logger.info("Deleting %s", path)
diff --git a/pyensembl/ensembl_release_versions.py b/pyensembl/ensembl_release_versions.py
index d31612a..246a380 100644
--- a/pyensembl/ensembl_release_versions.py
+++ b/pyensembl/ensembl_release_versions.py
@@ -10,8 +10,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .config import (MAX_ENSEMBL_RELEASE, MAX_ENSEMBLGENOME_RELEASE,
-                     MIN_ENSEMBL_RELEASE, MIN_ENSEMBLGENOME_RELEASE)
+from .config import (
+    MAX_ENSEMBL_RELEASE,
+    MAX_ENSEMBLGENOME_RELEASE,
+    MIN_ENSEMBL_RELEASE,
+    MIN_ENSEMBLGENOME_RELEASE,
+)
 
 
 def check_release_number(release, database=None):
@@ -19,7 +23,11 @@ def check_release_number(release, database=None):
     Check to make sure a release is in the valid range of Ensembl releases.
     """
     if release is None:
-        return MAX_ENSEMBL_RELEASE if database is None else MAX_ENSEMBLGENOME_RELEASE
+        return (
+            MAX_ENSEMBL_RELEASE
+            if database is None
+            else MAX_ENSEMBLGENOME_RELEASE
+        )
     try:
         release = int(release)
     except ValueError:
diff --git a/pyensembl/exon.py b/pyensembl/exon.py
index a520290..a84b75f 100644
--- a/pyensembl/exon.py
+++ b/pyensembl/exon.py
@@ -15,7 +15,9 @@
 
 
 class Exon(Locus):
-    def __init__(self, exon_id, contig, start, end, strand, gene_name, gene_id):
+    def __init__(
+        self, exon_id, contig, start, end, strand, gene_name, gene_id
+    ):
         Locus.__init__(self, contig, start, end, strand)
         self.exon_id = exon_id
         self.gene_name = gene_name
diff --git a/pyensembl/fasta.py b/pyensembl/fasta.py
index 3e8ba92..b55750b 100644
--- a/pyensembl/fasta.py
+++ b/pyensembl/fasta.py
@@ -32,7 +32,8 @@ def _parse_header_id(line):
     """
     if type(line) is not bytes:
         raise TypeError(
-            "Expected header line to be of type %s but got %s" % (bytes, type(line))
+            "Expected header line to be of type %s but got %s"
+            % (bytes, type(line))
         )
 
     if len(line) <= 1:
diff --git a/pyensembl/gene.py b/pyensembl/gene.py
index f26de48..b787c64 100644
--- a/pyensembl/gene.py
+++ b/pyensembl/gene.py
@@ -17,7 +17,9 @@
 
 
 class Gene(LocusWithGenome):
-    def __init__(self, gene_id, gene_name, contig, start, end, strand, biotype, genome):
+    def __init__(
+        self, gene_id, gene_name, contig, start, end, strand, biotype, genome
+    ):
         LocusWithGenome.__init__(
             self,
             contig=contig,
@@ -98,7 +100,8 @@ def transcripts(self):
         # its particular information, might be more efficient if we
         # just get all the columns here, but how do we keep that modular?
         return [
-            self.genome.transcript_by_id(result[0]) for result in transcript_id_results
+            self.genome.transcript_by_id(result[0])
+            for result in transcript_id_results
         ]
 
     @memoized_property
diff --git a/pyensembl/genome.py b/pyensembl/genome.py
index 355841b..a5e202d 100644
--- a/pyensembl/genome.py
+++ b/pyensembl/genome.py
@@ -11,8 +11,8 @@
 # limitations under the License.
 
 """
-Contains the Genome class, with its millions of accessors and wrappers
-around an arbitrary genomic database.
+Contains the Genome class, with its millions of accessors and wrappers around
+an arbitrary genomic database.
 """
 
 
@@ -31,8 +31,8 @@
 
 class Genome(Serializable):
     """
-    Bundles together the genomic annotation and sequence data associated with
-    a particular genomic database source (e.g. a single Ensembl release) and
+    Bundles together the genomic annotation and sequence data associated with a
+    particular genomic database source (e.g. a single Ensembl release) and
     provides a wide variety of helper methods for accessing this data.
     """
 
@@ -148,7 +148,7 @@ def to_dict(self):
 
     def _init_lazy_fields(self):
         """
-        Member data that gets loaded or constructed on demand
+        Member data that gets loaded or constructed on demand.
         """
         self.gtf_path = None
         self._protein_sequences = None
@@ -163,11 +163,15 @@ def _init_lazy_fields(self):
         self._exons = {}
 
     def _get_cached_path(
-        self, field_name, path_or_url, download_if_missing=False, overwrite=False
+        self,
+        field_name,
+        path_or_url,
+        download_if_missing=False,
+        overwrite=False,
     ):
         """
-        Get the local path for a possibly remote file, invoking either
-        a download or install error message if it's missing.
+        Get the local path for a possibly remote file, invoking either a
+        download or install error message if it's missing.
         """
         if len(field_name) == 0:
             raise ValueError("Expected non-empty field name")
@@ -188,7 +192,9 @@ def _get_gtf_path(self, download_if_missing=False, overwrite=False):
             overwrite=overwrite,
         )
 
-    def _get_transcript_fasta_paths(self, download_if_missing=False, overwrite=False):
+    def _get_transcript_fasta_paths(
+        self, download_if_missing=False, overwrite=False
+    ):
         if not self.requires_transcript_fasta:
             raise ValueError("No transcript FASTA source for %s" % self)
         return [
@@ -201,7 +207,9 @@ def _get_transcript_fasta_paths(self, download_if_missing=False, overwrite=False
             for path in self._transcript_fasta_paths_or_urls
         ]
 
-    def _get_protein_fasta_paths(self, download_if_missing=False, overwrite=False):
+    def _get_protein_fasta_paths(
+        self, download_if_missing=False, overwrite=False
+    ):
         # get the path for peptide FASTA files containing
         # this genome's protein sequences
         if not self.requires_protein_fasta:
@@ -233,7 +241,9 @@ def _set_local_paths(self, download_if_missing=True, overwrite=False):
     def required_local_files(self):
         paths = []
         if self._gtf_path_or_url:
-            paths.append(self.download_cache.cached_path(self._gtf_path_or_url))
+            paths.append(
+                self.download_cache.cached_path(self._gtf_path_or_url)
+            )
         if self._transcript_fasta_paths_or_urls:
             paths.extend(
                 [
@@ -273,8 +283,8 @@ def download(self, overwrite=False):
     def index(self, overwrite=False):
         """
         Assuming that all necessary data for this Genome has been downloaded,
-        generate the GTF database and save efficient representation of
-        FASTA sequence files.
+        generate the GTF database and save efficient representation of FASTA
+        sequence files.
         """
         if self.requires_gtf:
             self.db.connect_or_create(overwrite=overwrite)
@@ -295,7 +305,9 @@ def db(self):
                 overwrite=False,
             )
             if self.gtf_path is None:
-                raise ValueError("Property 'gtf_path' of %s cannot be None" % self)
+                raise ValueError(
+                    "Property 'gtf_path' of %s cannot be None" % self
+                )
 
             # Database object turns the GTF dataframes into sqlite3 tables
             # and wraps them with methods like `query_one`
@@ -348,7 +360,8 @@ def protein_sequences(self):
             self._set_local_paths(download_if_missing=False, overwrite=False)
             if self.protein_fasta_paths is None:
                 raise ValueError(
-                    "Property 'protein_fasta_paths' of %s cannot be None" % self
+                    "Property 'protein_fasta_paths' of %s cannot be None"
+                    % self
                 )
             self._protein_sequences = SequenceData(
                 fasta_paths=self.protein_fasta_paths,
@@ -360,13 +373,16 @@ def protein_sequences(self):
     def transcript_sequences(self):
         if self._transcript_sequences is None:
             if not self.requires_transcript_fasta:
-                raise ValueError("Missing transcript FASTA source for %s" % self)
+                raise ValueError(
+                    "Missing transcript FASTA source for %s" % self
+                )
             # make sure transcript FASTA file exists locally
             # and populate self.transcript_fasta_paths
             self._set_local_paths(download_if_missing=False, overwrite=False)
             if self.transcript_fasta_paths is None:
                 raise ValueError(
-                    "Property 'transcript_fasta_paths' of %s cannot be None" % (self,)
+                    "Property 'transcript_fasta_paths' of %s cannot be None"
+                    % (self,)
                 )
             self._transcript_sequences = SequenceData(
                 fasta_paths=self.transcript_fasta_paths,
@@ -376,8 +392,8 @@ def transcript_sequences(self):
 
     def install_string(self):
         """
-        Add every missing file to the install string shown to the user
-        in an error message.
+        Add every missing file to the install string shown to the user in an
+        error message.
         """
         args = [
             "--reference-name",
@@ -451,7 +467,7 @@ def __hash__(self):
 
     def clear_cache(self):
         """
-        Clear any in-memory cached values
+        Clear any in-memory cached values.
         """
         for maybe_fn in self.__dict__.values():
             # clear cache associated with all memoization decorators,
@@ -461,7 +477,7 @@ def clear_cache(self):
 
     def delete_index_files(self):
         """
-        Delete all data aside from source GTF and FASTA files
+        Delete all data aside from source GTF and FASTA files.
         """
         self.clear_cache()
         db_path = self.db.local_db_path()
@@ -472,9 +488,8 @@ def _all_feature_values(
         self, column, feature, distinct=True, contig=None, strand=None
     ):
         """
-        Cached lookup of all values for a particular feature property from
-        the database, caches repeated queries in memory and
-        stores them as a CSV.
+        Cached lookup of all values for a particular feature property from the
+        database, caches repeated queries in memory and stores them as a CSV.
 
         Parameters
         ----------
@@ -505,23 +520,31 @@ def _all_feature_values(
         )
 
     def transcript_sequence(self, transcript_id):
-        """Return cDNA nucleotide sequence of transcript, or None if
-        transcript doesn't have cDNA sequence.
+        """
+        Return cDNA nucleotide sequence of transcript, or None if transcript
+        doesn't have cDNA sequence.
         """
         if self.transcript_sequences is None:
-            raise ValueError("No transcript FASTA supplied to this Genome: %s" % self)
+            raise ValueError(
+                "No transcript FASTA supplied to this Genome: %s" % self
+            )
         return self.transcript_sequences.get(transcript_id)
 
     def protein_sequence(self, protein_id):
-        """Return cDNA nucleotide sequence of transcript, or None if
-        transcript doesn't have cDNA sequence.
+        """
+        Return cDNA nucleotide sequence of transcript, or None if transcript
+        doesn't have cDNA sequence.
         """
         if self.protein_sequences is None:
-            raise ValueError("No protein FASTA supplied to this Genome: %s" % self)
+            raise ValueError(
+                "No protein FASTA supplied to this Genome: %s" % self
+            )
         return self.protein_sequences.get(protein_id)
 
     def genes_at_locus(self, contig, position, end=None, strand=None):
-        gene_ids = self.gene_ids_at_locus(contig, position, end=end, strand=strand)
+        gene_ids = self.gene_ids_at_locus(
+            contig, position, end=end, strand=strand
+        )
         return [self.gene_by_id(gene_id) for gene_id in gene_ids]
 
     def transcripts_at_locus(self, contig, position, end=None, strand=None):
@@ -529,11 +552,14 @@ def transcripts_at_locus(self, contig, position, end=None, strand=None):
             contig, position, end=end, strand=strand
         )
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def exons_at_locus(self, contig, position, end=None, strand=None):
-        exon_ids = self.exon_ids_at_locus(contig, position, end=end, strand=strand)
+        exon_ids = self.exon_ids_at_locus(
+            contig, position, end=end, strand=strand
+        )
         return [self.exon_by_id(exon_id) for exon_id in exon_ids]
 
     def gene_ids_at_locus(self, contig, position, end=None, strand=None):
@@ -576,7 +602,9 @@ def transcript_ids_at_locus(self, contig, position, end=None, strand=None):
             strand=strand,
         )
 
-    def transcript_names_at_locus(self, contig, position, end=None, strand=None):
+    def transcript_names_at_locus(
+        self, contig, position, end=None, strand=None
+    ):
         return self.db.distinct_column_values_at_locus(
             column="transcript_name",
             feature="transcript",
@@ -606,7 +634,7 @@ def protein_ids_at_locus(self, contig, position, end=None, strand=None):
 
     def locus_of_gene_id(self, gene_id):
         """
-        Given a gene ID returns Locus with: chromosome, start, stop, strand
+        Given a gene ID returns Locus with: chromosome, start, stop, strand.
         """
         return self.db.query_locus(
             filter_column="gene_id", filter_value=gene_id, feature="gene"
@@ -615,9 +643,9 @@ def locus_of_gene_id(self, gene_id):
     def loci_of_gene_names(self, gene_name):
         """
         Given a gene name returns list of Locus objects with fields:
-            chromosome, start, stop, strand
-        You can get multiple results since a gene might have multiple copies
-        in the genome.
+
+        chromosome, start, stop, strand You can get multiple results
+        since a gene might have multiple copies in the genome.
         """
         return self.db.query_loci("gene_name", gene_name, "gene")
 
@@ -630,7 +658,7 @@ def locus_of_transcript_id(self, transcript_id):
 
     def locus_of_exon_id(self, exon_id):
         """
-        Given an exon ID returns Locus
+        Given an exon ID returns Locus.
         """
         return self.db.query_locus("exon_id", exon_id, feature="exon")
 
@@ -642,8 +670,8 @@ def locus_of_exon_id(self, exon_id):
 
     def contigs(self):
         """
-        Returns all contig names for any gene in the genome
-        (field called "seqname" in Ensembl GTF files)
+        Returns all contig names for any gene in the genome (field called
+        "seqname" in Ensembl GTF files)
         """
         return self.db.query_feature_values("seqname", "gene")
 
@@ -704,7 +732,9 @@ def gene_by_id(self, gene_id):
 
             gene_name, gene_biotype = None, None
             if len(result) < 4 or len(result) > 6:
-                raise ValueError("Result is not the expected length: %d" % len(result))
+                raise ValueError(
+                    "Result is not the expected length: %d" % len(result)
+                )
             contig, start, end, strand = result[:4]
             if len(result) == 5:
                 if "gene_name" in field_names:
@@ -738,8 +768,8 @@ def genes_by_name(self, gene_name):
 
     def gene_by_protein_id(self, protein_id):
         """
-        Get the gene ID associated with the given protein ID,
-        return its Gene object
+        Get the gene ID associated with the given protein ID, return its Gene
+        object.
         """
         gene_id = self.gene_id_of_protein_id(protein_id)
         return self.gene_by_id(gene_id)
@@ -763,8 +793,8 @@ def _query_gene_name(self, property_name, property_value, feature_type):
 
     def gene_names(self, contig=None, strand=None):
         """
-        Return all genes in the database,
-        optionally restrict to a chromosome and/or strand.
+        Return all genes in the database, optionally restrict to a chromosome
+        and/or strand.
         """
         return self._all_feature_values(
             column="gene_name", feature="gene", contig=contig, strand=strand
@@ -774,10 +804,14 @@ def gene_name_of_gene_id(self, gene_id):
         return self._query_gene_name("gene_id", gene_id, "gene")
 
     def gene_name_of_transcript_id(self, transcript_id):
-        return self._query_gene_name("transcript_id", transcript_id, "transcript")
+        return self._query_gene_name(
+            "transcript_id", transcript_id, "transcript"
+        )
 
     def gene_name_of_transcript_name(self, transcript_name):
-        return self._query_gene_name("transcript_name", transcript_name, "transcript")
+        return self._query_gene_name(
+            "transcript_name", transcript_name, "transcript"
+        )
 
     def gene_name_of_exon_id(self, exon_id):
         return self._query_gene_name("exon_id", exon_id, "exon")
@@ -801,8 +835,8 @@ def _query_gene_ids(self, property_name, value, feature="gene"):
 
     def gene_ids(self, contig=None, strand=None):
         """
-        What are all the gene IDs
-        (optionally restrict to a given chromosome/contig and/or strand)
+        What are all the gene IDs (optionally restrict to a given
+        chromosome/contig and/or strand)
         """
         return self._all_feature_values(
             column="gene_id", feature="gene", contig=contig, strand=strand
@@ -811,6 +845,7 @@ def gene_ids(self, contig=None, strand=None):
     def gene_ids_of_gene_name(self, gene_name):
         """
         What are the gene IDs associated with a given gene name?
+
         (due to copy events, there might be multiple genes per name)
         """
         results = self._query_gene_ids("gene_name", gene_name)
@@ -843,17 +878,21 @@ def gene_id_of_protein_id(self, protein_id):
 
     def transcripts(self, contig=None, strand=None):
         """
-        Construct Transcript object for every transcript entry in
-        the database. Optionally restrict to a particular
-        chromosome using the `contig` argument.
+        Construct Transcript object for every transcript entry in the database.
+
+        Optionally restrict to a particular chromosome using the
+        `contig` argument.
         """
         transcript_ids = self.transcript_ids(contig=contig, strand=strand)
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def transcript_by_id(self, transcript_id):
-        """Construct Transcript object with given transcript ID"""
+        """
+        Construct Transcript object with given transcript ID.
+        """
         if transcript_id not in self._transcripts:
             optional_field_names = [
                 "transcript_name",
@@ -886,8 +925,12 @@ def transcript_by_id(self, transcript_id):
                 raise ValueError("Transcript not found: %s" % (transcript_id,))
 
             transcript_name, transcript_biotype, tsl = None, None, None
-            if len(result) < 5 or len(result) > (5 + len(optional_field_names)):
-                raise ValueError("Result is not the expected length: %d" % len(result))
+            if len(result) < 5 or len(result) > (
+                5 + len(optional_field_names)
+            ):
+                raise ValueError(
+                    "Result is not the expected length: %d" % len(result)
+                )
             contig, start, end, strand, gene_id = result[:5]
             if len(result) > 5:
                 extra_field_names = [
@@ -920,9 +963,12 @@ def transcript_by_id(self, transcript_id):
         return self._transcripts[transcript_id]
 
     def transcripts_by_name(self, transcript_name):
-        transcript_ids = self.transcript_ids_of_transcript_name(transcript_name)
+        transcript_ids = self.transcript_ids_of_transcript_name(
+            transcript_name
+        )
         return [
-            self.transcript_by_id(transcript_id) for transcript_id in transcript_ids
+            self.transcript_by_id(transcript_id)
+            for transcript_id in transcript_ids
         ]
 
     def transcript_by_protein_id(self, protein_id):
@@ -948,25 +994,31 @@ def _query_transcript_names(self, property_name, value):
 
     def transcript_names(self, contig=None, strand=None):
         """
-        What are all the transcript names in the database
-        (optionally, restrict to a given chromosome and/or strand)
+        What are all the transcript names in the database (optionally, restrict
+        to a given chromosome and/or strand)
         """
         return self._all_feature_values(
-            column="transcript_name", feature="transcript", contig=contig, strand=strand
+            column="transcript_name",
+            feature="transcript",
+            contig=contig,
+            strand=strand,
         )
 
     def transcript_names_of_gene_name(self, gene_name):
         return self._query_transcript_names("gene_name", gene_name)
 
     def transcript_name_of_transcript_id(self, transcript_id):
-        transcript_names = self._query_transcript_names("transcript_id", transcript_id)
+        transcript_names = self._query_transcript_names(
+            "transcript_id", transcript_id
+        )
         if len(transcript_names) == 0:
             raise ValueError(
                 "No transcript names for transcript ID = %s" % transcript_id
             )
         elif len(transcript_names) > 1:
             raise ValueError(
-                "Multiple transcript names for transcript ID = %s" % (transcript_id,)
+                "Multiple transcript names for transcript ID = %s"
+                % (transcript_id,)
             )
         return transcript_names[0]
 
@@ -976,7 +1028,9 @@ def transcript_name_of_transcript_id(self, transcript_id):
     #
     ###################################################
 
-    def _query_transcript_ids(self, property_name, value, feature="transcript"):
+    def _query_transcript_ids(
+        self, property_name, value, feature="transcript"
+    ):
         results = self.db.query(
             select_column_names=["transcript_id"],
             filter_column=property_name,
@@ -989,7 +1043,10 @@ def _query_transcript_ids(self, property_name, value, feature="transcript"):
 
     def transcript_ids(self, contig=None, strand=None):
         return self._all_feature_values(
-            column="transcript_id", feature="transcript", contig=contig, strand=strand
+            column="transcript_id",
+            feature="transcript",
+            contig=contig,
+            strand=strand,
         )
 
     def transcript_ids_of_gene_id(self, gene_id):
@@ -1008,7 +1065,9 @@ def transcript_id_of_protein_id(self, protein_id):
         """
         What is the transcript ID associated with a given protein ID?
         """
-        results = self._query_transcript_ids("protein_id", protein_id, feature="CDS")
+        results = self._query_transcript_ids(
+            "protein_id", protein_id, feature="CDS"
+        )
         if len(results) == 0:
             raise ValueError("Protein ID not found: %s" % protein_id)
         elif len(results) > 1:
@@ -1029,15 +1088,16 @@ def transcript_id_of_protein_id(self, protein_id):
 
     def exons(self, contig=None, strand=None):
         """
-        Create exon object for all exons in the database, optionally
-        restrict to a particular chromosome using the `contig` argument.
+        Create exon object for all exons in the database, optionally restrict
+        to a particular chromosome using the `contig` argument.
         """
         # DataFrame with single column called "exon_id"
         exon_ids = self.exon_ids(contig=contig, strand=strand)
         return [self.exon_by_id(exon_id) for exon_id in exon_ids]
 
     def exon_by_id(self, exon_id):
-        """Construct an Exon object from its ID by looking up the exon"s
+        """
+        Construct an Exon object from its ID by looking up the exon"s
         properties in the given Database.
         """
         if exon_id not in self._exons:
@@ -1112,8 +1172,8 @@ def exon_ids_of_transcript_id(self, transcript_id):
 
     def protein_ids(self, contig=None, strand=None):
         """
-        What are all the protein IDs
-        (optionally restrict to a given chromosome and/or strand)
+        What are all the protein IDs (optionally restrict to a given chromosome
+        and/or strand)
         """
         protein_ids = self._all_feature_values(
             column="protein_id",
diff --git a/pyensembl/locus.py b/pyensembl/locus.py
index b88b4a3..c087183 100644
--- a/pyensembl/locus.py
+++ b/pyensembl/locus.py
@@ -49,7 +49,8 @@ def __init__(self, contig, start, end, strand):
 
         if end < start:
             raise ValueError(
-                "Expected start <= end, got start = %d, end = %d" % (start, end)
+                "Expected start <= end, got start = %d, end = %d"
+                % (start, end)
             )
         self.start = start
         self.end = end
@@ -149,7 +150,9 @@ def offset_range(self, start, end):
             )
 
         if start < self.start or end > self.end:
-            raise ValueError("Range (%d, %d) falls outside %s" % (start, end, self))
+            raise ValueError(
+                "Range (%d, %d) falls outside %s" % (start, end, self)
+            )
 
         if self.on_forward_strand:
             return (start - self.start, end - self.start)
@@ -183,7 +186,9 @@ def can_overlap(self, contig, strand=None):
         """
         Is this locus on the same contig and (optionally) on the same strand?
         """
-        return self.on_contig(contig) and (strand is None or self.on_strand(strand))
+        return self.on_contig(contig) and (
+            strand is None or self.on_strand(strand)
+        )
 
     def distance_to_interval(self, start, end):
         """
@@ -220,15 +225,23 @@ def overlaps(self, contig, start, end, strand=None):
 
     def overlaps_locus(self, other_locus):
         return self.overlaps(
-            other_locus.contig, other_locus.start, other_locus.end, other_locus.strand
+            other_locus.contig,
+            other_locus.start,
+            other_locus.end,
+            other_locus.strand,
         )
 
     def contains(self, contig, start, end, strand=None):
         return (
-            self.can_overlap(contig, strand) and start >= self.start and end <= self.end
+            self.can_overlap(contig, strand)
+            and start >= self.start
+            and end <= self.end
         )
 
     def contains_locus(self, other_locus):
         return self.contains(
-            other_locus.contig, other_locus.start, other_locus.end, other_locus.strand
+            other_locus.contig,
+            other_locus.start,
+            other_locus.end,
+            other_locus.strand,
         )
diff --git a/pyensembl/reference_name.py b/pyensembl/reference_name.py
index dbb8d1f..5731d80 100644
--- a/pyensembl/reference_name.py
+++ b/pyensembl/reference_name.py
@@ -29,7 +29,9 @@ def normalize_reference_name(name):
 
 
 def find_species_by_reference(reference_name):
-    return Species._reference_names_to_species[normalize_reference_name(reference_name)]
+    return Species._reference_names_to_species[
+        normalize_reference_name(reference_name)
+    ]
 
 
 def which_reference(species_name, ensembl_release):
@@ -42,7 +44,9 @@ def max_ensembl_release(reference_name):
     return max_release
 
 
-def genome_for_reference_name(reference_name, allow_older_downloaded_release=True):
+def genome_for_reference_name(
+    reference_name, allow_older_downloaded_release=True
+):
     """
     Given a genome reference name, such as "GRCh38", returns the
     corresponding Ensembl Release object.
@@ -60,7 +64,9 @@ def genome_for_reference_name(reference_name, allow_older_downloaded_release=Tru
     ]
     if allow_older_downloaded_release:
         # go through candidate releases in descending order
-        for release in reversed(range(min_ensembl_release, max_ensembl_release + 1)):
+        for release in reversed(
+            range(min_ensembl_release, max_ensembl_release + 1)
+        ):
             # check if release has been locally downloaded
             candidate = EnsemblRelease.cached(release=release, species=species)
             if candidate.required_local_files_exist():
diff --git a/pyensembl/sequence_data.py b/pyensembl/sequence_data.py
index c2a6e0d..e18a9e8 100644
--- a/pyensembl/sequence_data.py
+++ b/pyensembl/sequence_data.py
@@ -32,10 +32,14 @@ def __init__(self, fasta_paths, cache_directory_path=None):
             fasta_paths = [fasta_paths]
 
         self.fasta_paths = [abspath(path) for path in fasta_paths]
-        self.fasta_directory_paths = [split(path)[0] for path in self.fasta_paths]
+        self.fasta_directory_paths = [
+            split(path)[0] for path in self.fasta_paths
+        ]
         self.fasta_filenames = [split(path)[1] for path in self.fasta_paths]
         if cache_directory_path:
-            self.cache_directory_paths = [cache_directory_path] * len(self.fasta_paths)
+            self.cache_directory_paths = [cache_directory_path] * len(
+                self.fasta_paths
+            )
         else:
             self.cache_directory_paths = self.fasta_directory_paths
         for path in self.fasta_paths:
@@ -104,7 +108,9 @@ def _load_or_create_fasta_dictionary_pickle(self):
                 try:
                     fasta_dictionary_tmp = load_pickle(pickle_path)
                     self._add_to_fasta_dictionary(fasta_dictionary_tmp)
-                    logger.info("Loaded sequence dictionary from %s", pickle_path)
+                    logger.info(
+                        "Loaded sequence dictionary from %s", pickle_path
+                    )
                     continue
                 except (pickle.UnpicklingError, AttributeError):
                     # catch either an UnpicklingError or an AttributeError
diff --git a/pyensembl/transcript.py b/pyensembl/transcript.py
index 012a152..694e702 100644
--- a/pyensembl/transcript.py
+++ b/pyensembl/transcript.py
@@ -126,7 +126,10 @@ def exons(self):
         # in each transcript
         columns = ["exon_number", "exon_id"]
         exon_numbers_and_ids = self.db.query(
-            columns, filter_column="transcript_id", filter_value=self.id, feature="exon"
+            columns,
+            filter_column="transcript_id",
+            filter_value=self.id,
+            feature="exon",
         )
 
         # fill this list in its correct order (by exon_number) by using
@@ -137,7 +140,8 @@ def exons(self):
             exon = self.genome.exon_by_id(exon_id)
             if exon is None:
                 raise ValueError(
-                    "Missing exon %s for transcript %s" % (exon_number, self.id)
+                    "Missing exon %s for transcript %s"
+                    % (exon_number, self.id)
                 )
             exon_number = int(exon_number)
             if exon_number < 1:
@@ -174,7 +178,8 @@ def _transcript_feature_position_ranges(self, feature, required=True):
 
         if required and len(results) == 0:
             raise ValueError(
-                "Transcript %s does not contain feature %s" % (self.id, feature)
+                "Transcript %s does not contain feature %s"
+                % (self.id, feature)
             )
         return results
 
@@ -183,7 +188,9 @@ def _transcript_feature_positions(self, feature):
         """
         Get unique positions for feature, raise an error if feature is absent.
         """
-        ranges = self._transcript_feature_position_ranges(feature, required=True)
+        ranges = self._transcript_feature_position_ranges(
+            feature, required=True
+        )
         results = []
         # a feature (such as a stop codon), maybe be split over multiple
         # contiguous ranges. Collect all the nucleotide positions into a
@@ -329,7 +336,9 @@ def spliced_offset(self, position):
                 exon_offset = unspliced_offset - exon_unspliced_start
                 return total_spliced_offset + exon_offset
             else:
-                exon_length = len(exon)  # exon_end_position - exon_start_position + 1
+                exon_length = len(
+                    exon
+                )  # exon_end_position - exon_start_position + 1
                 total_spliced_offset += exon_length
         raise ValueError(
             "Couldn't find position %d on any exon of %s" % (position, self.id)
@@ -341,7 +350,9 @@ def start_codon_unspliced_offsets(self):
         Offsets from start of unspliced pre-mRNA transcript
         of nucleotides in start codon.
         """
-        return [self.offset(position) for position in self.start_codon_positions]
+        return [
+            self.offset(position) for position in self.start_codon_positions
+        ]
 
     @memoized_property
     def stop_codon_unspliced_offsets(self):
@@ -349,7 +360,9 @@ def stop_codon_unspliced_offsets(self):
         Offsets from start of unspliced pre-mRNA transcript
         of nucleotides in stop codon.
         """
-        return [self.offset(position) for position in self.stop_codon_positions]
+        return [
+            self.offset(position) for position in self.stop_codon_positions
+        ]
 
     def _contiguous_offsets(self, offsets):
         """
@@ -369,7 +382,8 @@ def start_codon_spliced_offsets(self):
         of nucleotides in start codon.
         """
         offsets = [
-            self.spliced_offset(position) for position in self.start_codon_positions
+            self.spliced_offset(position)
+            for position in self.start_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
@@ -380,7 +394,8 @@ def stop_codon_spliced_offsets(self):
         of nucleotides in stop codon.
         """
         offsets = [
-            self.spliced_offset(position) for position in self.stop_codon_positions
+            self.spliced_offset(position)
+            for position in self.stop_codon_positions
         ]
         return self._contiguous_offsets(offsets)
 
diff --git a/tests/common.py b/tests/common.py
index 094b6a2..5012dcc 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -26,7 +26,9 @@ def test_ensembl_releases(*versions):
         ensembl_releases = major_releases
     else:
         if any(version > MAX_ENSEMBL_RELEASE for version in versions):
-            raise ValueError("Invalid ensembl release numbers: %s" % (versions,))
+            raise ValueError(
+                "Invalid ensembl release numbers: %s" % (versions,)
+            )
         ensembl_releases = [cached_release(version) for version in versions]
 
     def decorator(test_fn):
diff --git a/tests/data.py b/tests/data.py
index 60cd08a..eea2ed7 100644
--- a/tests/data.py
+++ b/tests/data.py
@@ -43,7 +43,9 @@ def data_path(name):
 )
 
 # 3' UTR for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_UTR3 = "CTGCAAAGCCCTTGGAACACCCTGGATGCTGTTGAGGGCCAAGAGATCTGTGTGGCTCC"
+CTNNBIP1_004_UTR3 = (
+    "CTGCAAAGCCCTTGGAACACCCTGGATGCTGTTGAGGGCCAAGAGATCTGTGTGGCTCC"
+)
 
 CTNNBIP1_004_locus = Locus("1", 9850659, 9878176, "-")
 
@@ -131,7 +133,9 @@ def data_path(name):
     reference_name="GRCm38",
     annotation_name="_test_mouse_ensembl81_subset",
     gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-    transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+    transcript_fasta_paths_or_urls=[
+        MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
+    ],
     protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
 )
 
diff --git a/tests/test_download_cache.py b/tests/test_download_cache.py
index 2bf5913..3194a01 100644
--- a/tests/test_download_cache.py
+++ b/tests/test_download_cache.py
@@ -1,5 +1,9 @@
 from nose.tools import assert_raises, ok_
-from pyensembl.download_cache import DownloadCache, MissingLocalFile, MissingRemoteFile
+from pyensembl.download_cache import (
+    DownloadCache,
+    MissingLocalFile,
+    MissingRemoteFile,
+)
 
 import os
 import tempfile
diff --git a/tests/test_exon_id.py b/tests/test_exon_id.py
index 18590f8..8bd1b08 100644
--- a/tests/test_exon_id.py
+++ b/tests/test_exon_id.py
@@ -122,7 +122,8 @@ def test_exon_ids_of_transcript_name():
         len(exon_ids),
     )
     assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 for exon_id in exon_ids
+        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
+        for exon_id in exon_ids
     )
 
 
@@ -140,5 +141,6 @@ def exon_ids_of_transcript_id():
         len(exon_ids),
     )
     assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 for exon_id in exon_ids
+        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
+        for exon_id in exon_ids
     )
diff --git a/tests/test_exon_object.py b/tests/test_exon_object.py
index 9a77cde..40d2724 100644
--- a/tests/test_exon_object.py
+++ b/tests/test_exon_object.py
@@ -16,7 +16,9 @@ def test_exon_object_by_id():
     up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00003464041")
-    assert exon.gene_name == "CTNNB1", "Unexpected gene name: %s" % exon.gene_name
+    assert exon.gene_name == "CTNNB1", (
+        "Unexpected gene name: %s" % exon.gene_name
+    )
     assert exon.contig == "3", exon.contig
     assert exon.strand == "+"
     assert exon.on_forward_strand
@@ -32,7 +34,9 @@ def test_exon_object_by_id_on_negative_strand():
     from CXCR3 when looked up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert exon.gene_name == "CXCR3", "Unexpected gene name: %s" % exon.gene_name
+    assert exon.gene_name == "CXCR3", (
+        "Unexpected gene name: %s" % exon.gene_name
+    )
     assert exon.contig == "X", exon.contig
     assert exon.strand == "-"
     assert exon.on_backward_strand
@@ -86,7 +90,9 @@ def test_exon_basic_properties_str():
 
 def test_exon_basic_properties_hash():
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert isinstance(hash(exon), int), "Hash function returns %s instead of int" % (
+    assert isinstance(
+        hash(exon), int
+    ), "Hash function returns %s instead of int" % (
         type(
             hash(exon),
         )
diff --git a/tests/test_gene_ids.py b/tests/test_gene_ids.py
index 3f1420e..8666c89 100644
--- a/tests/test_gene_ids.py
+++ b/tests/test_gene_ids.py
@@ -22,7 +22,9 @@ def test_gene_ids_grch38_hla_a():
     # Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
     ids = ensembl_grch38.gene_ids_at_locus(6, 29945884)
     expected = "ENSG00000206503"
-    assert ids == ["ENSG00000206503"], "Expected HLA-A, gene ID = %s, got: %s" % (
+    assert ids == [
+        "ENSG00000206503"
+    ], "Expected HLA-A, gene ID = %s, got: %s" % (
         expected,
         ids,
     )
@@ -46,7 +48,9 @@ def test_gene_id_of_protein_id_release77():
 
 def test_gene_id_of_invalid_name():
     with assert_raises(Exception):
-        ensembl_grch38.gene_ids_of_gene_name("A wonderous pony sees through your soul")
+        ensembl_grch38.gene_ids_of_gene_name(
+            "A wonderous pony sees through your soul"
+        )
 
 
 @test_ensembl_releases()
diff --git a/tests/test_gene_names.py b/tests/test_gene_names.py
index 626537b..ac2e892 100644
--- a/tests/test_gene_names.py
+++ b/tests/test_gene_names.py
@@ -67,6 +67,9 @@ def test_gene_name_of_HLA_gene_id():
     gene_ids = grch38.gene_ids_of_gene_name("HLA-A")
     gene_names = [grch38.gene_name_of_gene_id(gene_id) for gene_id in gene_ids]
     unique_gene_names = list(set(gene_names))
-    assert len(unique_gene_names) == 1, (len(unique_gene_names), unique_gene_names)
+    assert len(unique_gene_names) == 1, (
+        len(unique_gene_names),
+        unique_gene_names,
+    )
     gene_name = unique_gene_names[0]
     assert gene_name == "HLA-A", gene_name
diff --git a/tests/test_gene_objects.py b/tests/test_gene_objects.py
index 63fe006..07f5f11 100644
--- a/tests/test_gene_objects.py
+++ b/tests/test_gene_objects.py
@@ -9,12 +9,16 @@ def test_TP53_gene_object_by_id(genome):
     # when we look up TP53 by its gene ID, we should get the
     # correct gene back
     gene = genome.gene_by_id(TP53_gene_id)
-    assert gene.name == "TP53", "Incorrect gene name %s for gene ID %s in %s" % (
+    assert (
+        gene.name == "TP53"
+    ), "Incorrect gene name %s for gene ID %s in %s" % (
         gene.name,
         gene.id,
         genome,
     )
-    assert gene.contig == "17", "Incorrect gene contig %s for gene ID %s in %s" % (
+    assert (
+        gene.contig == "17"
+    ), "Incorrect gene contig %s for gene ID %s in %s" % (
         gene.contig,
         gene.id,
         genome,
@@ -25,9 +29,13 @@ def test_TP53_gene_object_by_id(genome):
 def test_TP53_gene_object_by_name(genome):
     genes = genome.genes_by_name("TP53")
     # we should only have one TP53 gene (there aren't any copies)
-    assert len(genes) == 1, "Expected only one gene with name TP53, got %s" % (genes,)
+    assert len(genes) == 1, "Expected only one gene with name TP53, got %s" % (
+        genes,
+    )
     # make sure it has the correct gene ID
-    assert genes[0].id == TP53_gene_id, "Expected gene to have ID %s, got %s" % (
+    assert (
+        genes[0].id == TP53_gene_id
+    ), "Expected gene to have ID %s, got %s" % (
         TP53_gene_id,
         genes[0].id,
     )
diff --git a/tests/test_id_length.py b/tests/test_id_length.py
index cc61869..2d48877 100644
--- a/tests/test_id_length.py
+++ b/tests/test_id_length.py
@@ -10,7 +10,9 @@ def check_id_length(method_name):
         # only load chromosome Y to speed up tests
         idents = method(contig="Y")
         assert len(idents) > 0, "No values returned by %s" % method_name
-        assert all(len(ident) == 15 for ident in idents), "Invalid IDs for %s: %s" % (
+        assert all(
+            len(ident) == 15 for ident in idents
+        ), "Invalid IDs for %s: %s" % (
             method_name,
             [ident for ident in idents if len(ident) != 15],
         )
diff --git a/tests/test_missing_genome_sources.py b/tests/test_missing_genome_sources.py
index 6069261..d03f856 100644
--- a/tests/test_missing_genome_sources.py
+++ b/tests/test_missing_genome_sources.py
@@ -22,12 +22,17 @@ def no_gtf_(cm):
 
 
 def no_transcript_(cm):
-    print("Testing for 'transcript' in %s : %s" % (type(cm.exception), cm.exception))
+    print(
+        "Testing for 'transcript' in %s : %s"
+        % (type(cm.exception), cm.exception)
+    )
     ok_("transcript" in str(cm.exception))
 
 
 def no_protein_(cm):
-    print("Testing for 'protein' in %s : %s" % (type(cm.exception), cm.exception))
+    print(
+        "Testing for 'protein' in %s : %s" % (type(cm.exception), cm.exception)
+    )
     ok_("protein" in str(cm.exception))
 
 
@@ -35,7 +40,9 @@ def test_transcript_fasta_only():
     genome = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+        transcript_fasta_paths_or_urls=[
+            MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
+        ],
     )
     genome.index()
 
@@ -66,7 +73,9 @@ def test_protein_fasta_only():
     genome_only_proteins = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
+        protein_fasta_paths_or_urls=[
+            MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH
+        ],
     )
     genome_only_proteins.index()
 
@@ -107,7 +116,9 @@ def test_gtf_transcript_only():
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-        transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+        transcript_fasta_paths_or_urls=[
+            MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
+        ],
     )
     genome_gtf_with_cdna.index()
 
@@ -126,7 +137,9 @@ def test_gtf_protein_only():
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-        protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
+        protein_fasta_paths_or_urls=[
+            MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH
+        ],
     )
     genome_gtf_with_proteins.index()
 
diff --git a/tests/test_mouse.py b/tests/test_mouse.py
index 5ec03b6..cf45b54 100644
--- a/tests/test_mouse.py
+++ b/tests/test_mouse.py
@@ -1,6 +1,9 @@
 from nose.tools import eq_, with_setup
 
-from .data import custom_mouse_genome_grcm38_subset, setup_init_custom_mouse_genome
+from .data import (
+    custom_mouse_genome_grcm38_subset,
+    setup_init_custom_mouse_genome,
+)
 
 
 @with_setup(setup=setup_init_custom_mouse_genome)
diff --git a/tests/test_release_versions.py b/tests/test_release_versions.py
index 42761bd..5fa1288 100644
--- a/tests/test_release_versions.py
+++ b/tests/test_release_versions.py
@@ -26,7 +26,9 @@ def test_version_is_none():
 def test_max_ensembl_release():
     assert isinstance(
         MAX_ENSEMBL_RELEASE, int
-    ), "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (type(MAX_ENSEMBL_RELEASE),)
+    ), "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (
+        type(MAX_ENSEMBL_RELEASE),
+    )
     assert 83 <= MAX_ENSEMBL_RELEASE < 1000, (
         "Unexpected value for MAX_ENSEMBL_RELEASE: %d" % MAX_ENSEMBL_RELEASE
     )
diff --git a/tests/test_search.py b/tests/test_search.py
index 40930a4..6c90381 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -37,13 +37,17 @@ def test_find_nearest_BRAF_transcript(ensembl):
     for transcript in transcripts:
         # immediately before transcript
         result_before = find_nearest_locus(
-            start=transcript.start - 2, end=transcript.start - 1, loci=transcripts
+            start=transcript.start - 2,
+            end=transcript.start - 1,
+            loci=transcripts,
         )
         eq_(result_before, (1, transcript))
 
         # overlapping with transcript
         result_overlap = find_nearest_locus(
-            start=transcript.start - 2, end=transcript.start + 1, loci=transcripts
+            start=transcript.start - 2,
+            end=transcript.start + 1,
+            loci=transcripts,
         )
         eq_(result_overlap, (0, transcript))
 
diff --git a/tests/test_sequence_data.py b/tests/test_sequence_data.py
index 1d8b7fd..2675c14 100644
--- a/tests/test_sequence_data.py
+++ b/tests/test_sequence_data.py
@@ -18,8 +18,12 @@ def test_sequence_type():
     with TemporaryDirectory() as tmpdir:
         seqs_dna = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
         seq = seqs_dna.get("ENSMUST00000138942")
-        assert seq is not None, "Failed to find sequence for ENSMUST00000138942"
-        assert isinstance(seq, str), "Wrong sequence type, expected %s but got %s" % (
+        assert (
+            seq is not None
+        ), "Failed to find sequence for ENSMUST00000138942"
+        assert isinstance(
+            seq, str
+        ), "Wrong sequence type, expected %s but got %s" % (
             str,
             type(seq),
         )
@@ -35,10 +39,14 @@ def test_missing_sequence():
 def test_clear_cache():
     with TemporaryDirectory() as tmpdir:
         seqs = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
-        assert not seqs._fasta_dictionary, "Expected _fasta_dictionary to load lazily"
+        assert (
+            not seqs._fasta_dictionary
+        ), "Expected _fasta_dictionary to load lazily"
 
         seqs._load_or_create_fasta_dictionary_pickle()
-        assert len(seqs._fasta_dictionary) > 0, "FASTA dictionary didn't get created"
+        assert (
+            len(seqs._fasta_dictionary) > 0
+        ), "FASTA dictionary didn't get created"
 
         seqs.clear_cache()
         assert (
@@ -51,4 +59,6 @@ def test_clear_cache():
 
         seqs._load_or_create_fasta_dictionary_pickle()
         for pickle_path in seqs.fasta_dictionary_pickle_paths:
-            assert exists(pickle_path), "Cached pickle file should have been created"
+            assert exists(
+                pickle_path
+            ), "Cached pickle file should have been created"
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index 40d2c9f..3ac0983 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -106,7 +106,9 @@ def test_custom_genome_to_json():
 
 @with_setup(setup=setup_init_custom_mouse_genome)
 def test_custom_genome_to_dict():
-    reconstructed = Genome.from_dict(custom_mouse_genome_grcm38_subset.to_dict())
+    reconstructed = Genome.from_dict(
+        custom_mouse_genome_grcm38_subset.to_dict()
+    )
     eq_(custom_mouse_genome_grcm38_subset, reconstructed)
 
 
@@ -127,4 +129,6 @@ def test_unique_memory_address_of_unpickled_genomes(ensembl_genome):
     unpickled = pickle.loads(pickle.dumps(ensembl_genome))
     assert (
         ensembl_genome is unpickled
-    ), "Expected same object for %s but got two different instances" % (unpickled,)
+    ), "Expected same object for %s but got two different instances" % (
+        unpickled,
+    )
diff --git a/tests/test_shell.py b/tests/test_shell.py
index dcc3b77..390bb2d 100644
--- a/tests/test_shell.py
+++ b/tests/test_shell.py
@@ -4,7 +4,9 @@
 
 
 def test_genome_selection_grch38():
-    args = parser.parse_args(["install", "--release", "100", "--species", "human"])
+    args = parser.parse_args(
+        ["install", "--release", "100", "--species", "human"]
+    )
     genomes = all_combinations_of_ensembl_genomes(args)
     assert len(genomes) == 1
     genome = genomes[0]
diff --git a/tests/test_timings.py b/tests/test_timings.py
index b0fd8e1..80d8474 100644
--- a/tests/test_timings.py
+++ b/tests/test_timings.py
@@ -14,7 +14,9 @@ def make_repeat_lookup_fn(lookup_fn, n_positions):
 
     def repeat_lookup_fn():
         for contig in contigs:
-            for position in [10**6 + i * 10**6 for i in range(n_positions)]:
+            for position in [
+                10**6 + i * 10**6 for i in range(n_positions)
+            ]:
                 lookup_fn(contig, position)
 
     return repeat_lookup_fn
@@ -28,9 +30,13 @@ def run_benchmark(lookup_fn, n_positions_per_contig=20, time_limit=60.0):
     repeat_lookup_fn = make_repeat_lookup_fn(lookup_fn, n_positions_per_contig)
     n_loci = n_positions_per_contig * len(contigs)
     name = lookup_fn.__name__
-    average_time = benchmark(repeat_lookup_fn, name="%s for %d loci" % (name, n_loci))
+    average_time = benchmark(
+        repeat_lookup_fn, name="%s for %d loci" % (name, n_loci)
+    )
     print("-- %s : %0.4fs" % (name, average_time))
-    assert average_time < time_limit, "%s took too long for %s loci: %0.4fs" % (
+    assert (
+        average_time < time_limit
+    ), "%s took too long for %s loci: %0.4fs" % (
         name,
         n_loci,
         average_time,
diff --git a/tests/test_transcript_ids.py b/tests/test_transcript_ids.py
index f1e910f..9d067c5 100644
--- a/tests/test_transcript_ids.py
+++ b/tests/test_transcript_ids.py
@@ -32,7 +32,8 @@ def test_transcript_ids_ensembl_grch38_hla_a():
     transcript_ids = grch38.transcript_ids_at_locus(6, 29941260, 29945884)
     for transcript_id in HLA_A_TRANSCRIPT_IDS:
         assert transcript_id in transcript_ids, (
-            "Transcript %s of HLA-A not found overlapping locus" % transcript_id
+            "Transcript %s of HLA-A not found overlapping locus"
+            % transcript_id
         )
 
 
@@ -49,7 +50,9 @@ def test_transcript_ids_ensembl_grch38_hla_a():
 def test_all_transcript_ids(ensembl):
     transcript_ids = set(ensembl.transcript_ids())
     for transcript_id in KNOWN_TRANSCRIPT_IDS:
-        assert transcript_id in transcript_ids, "Missing transcript ID %s from %s" % (
+        assert (
+            transcript_id in transcript_ids
+        ), "Missing transcript ID %s from %s" % (
             transcript_id,
             ensembl,
         )
diff --git a/tests/test_transcript_objects.py b/tests/test_transcript_objects.py
index b8d5d58..ea83140 100644
--- a/tests/test_transcript_objects.py
+++ b/tests/test_transcript_objects.py
@@ -23,7 +23,9 @@ def test_transcript_start_codon():
     test_transcript_start_codon : Check that fields Transcript
     (for transcript named CTNNBIP1-004) matches known values.
     """
-    CTNNBIP1_004_transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
+    CTNNBIP1_004_transcript = ensembl77.transcript_by_id(
+        CTNNBIP1_004_transcript_id
+    )
 
     assert Locus.__eq__(
         CTNNBIP1_004_transcript, CTNNBIP1_004_locus
@@ -61,7 +63,9 @@ def test_transcript_exons():
     """
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     exons = transcript.exons
-    assert isinstance(exons, list), "Expected list of Exon objects, got %s : %s" % (
+    assert isinstance(
+        exons, list
+    ), "Expected list of Exon objects, got %s : %s" % (
         exons,
         type(exons),
     )
@@ -69,7 +73,10 @@ def test_transcript_exons():
     # CTTNBIP1-004 has 5 exons
     assert len(exons) == len(
         CTTNNIP1_004_exon_lengths
-    ), "Expected %d exons but got %d" % (len(CTTNNIP1_004_exon_lengths), len(exons))
+    ), "Expected %d exons but got %d" % (
+        len(CTTNNIP1_004_exon_lengths),
+        len(exons),
+    )
 
     for i, exon in enumerate(exons):
         expected_id = CTTNNIP1_004_exon_ids[i]
@@ -128,7 +135,13 @@ def test_sequence_parts(genome):
         combined_sequence_length,
         len(transcript),
         "Length 5' UTR(%dnt) + CDS(%dnt) + 3' UTR(%d) = %d, expected %d"
-        % (len(utr5), len(cds), len(utr3), combined_sequence_length, len(transcript)),
+        % (
+            len(utr5),
+            len(cds),
+            len(utr3),
+            combined_sequence_length,
+            len(transcript),
+        ),
     )
     eq_(
         combined_string,
@@ -145,7 +158,8 @@ def test_transcript_utr5_sequence_CTNNIP1_004():
     eq_(
         len(utr5),
         expected_utr5_length,
-        "Expected 5' UTR length %d, got %d" % (expected_utr5_length, len(utr5)),
+        "Expected 5' UTR length %d, got %d"
+        % (expected_utr5_length, len(utr5)),
     )
     eq_(utr5, CTNNBIP1_004_UTR5)
 
@@ -157,7 +171,8 @@ def test_transcript_utr3_sequence_CTNNIP1_004():
     eq_(
         len(utr3),
         expected_utr3_length,
-        "Expected 3' UTR length %d, got %d" % (expected_utr3_length, len(utr3)),
+        "Expected 3' UTR length %d, got %d"
+        % (expected_utr3_length, len(utr3)),
     )
     eq_(utr3, CTNNBIP1_004_UTR3)
 
@@ -209,10 +224,11 @@ def test_transcript_gene_should_match_parent_gene():
 @test_ensembl_releases()
 def test_BRCA1_201_has_protein_coding_biotype(genome):
     transcript = genome.transcripts_by_name("BRCA1-201")[0]
-    assert (
-        transcript.is_protein_coding
-    ), "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s" % (
-        transcript,
-        genome,
+    assert transcript.is_protein_coding, (
+        "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s"
+        % (
+            transcript,
+            genome,
+        )
     )
     eq_(transcript.biotype, "protein_coding")
diff --git a/tests/test_ucsc_gtf.py b/tests/test_ucsc_gtf.py
index 7cecde5..777ad21 100644
--- a/tests/test_ucsc_gtf.py
+++ b/tests/test_ucsc_gtf.py
@@ -15,7 +15,10 @@ def test_ucsc_gencode_gtf():
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 12 exons from the dataframe
-        assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (len(exons), exons)
+        assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (
+            len(exons),
+            exons,
+        )
 
 
 def test_ucsc_gencode_genome():
@@ -33,8 +36,13 @@ def test_ucsc_gencode_genome():
         genome.index()
         genes = genome.genes()
         for gene in genes:
-            assert gene.id, "Gene with missing ID in %s" % (genome.gtf.dataframe(),)
-        assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (len(genes), genes)
+            assert gene.id, "Gene with missing ID in %s" % (
+                genome.gtf.dataframe(),
+            )
+        assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (
+            len(genes),
+            genes,
+        )
         transcripts = genome.transcripts()
         for transcript in transcripts:
             assert transcript.id, "Transcript with missing ID in %s" % (
@@ -67,7 +75,10 @@ def test_ucsc_refseq_gtf():
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 16 exons from the GTF
-        assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (len(exons), exons)
+        assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (
+            len(exons),
+            exons,
+        )
 
 
 def test_ucsc_refseq_genome():
@@ -88,7 +99,10 @@ def test_ucsc_refseq_genome():
             assert gene.id, "Gene with missing ID in %s" % (
                 genome.db._load_gtf_as_dataframe(),
             )
-        assert len(genes) == 2, "Expected 2 genes, got %d: %s" % (len(genes), genes)
+        assert len(genes) == 2, "Expected 2 genes, got %d: %s" % (
+            len(genes),
+            genes,
+        )
         transcripts = genome.transcripts()
         for transcript in transcripts:
             assert transcript.id, "Transcript with missing ID in %s" % (

From e586f2842951c31e8f78b908d66d47670fd0817c Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Tue, 9 Jan 2024 23:54:25 -0600
Subject: [PATCH 30/35] fix gene name error

---
 pyensembl/database.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/pyensembl/database.py b/pyensembl/database.py
index 562aa06..f91e82b 100644
--- a/pyensembl/database.py
+++ b/pyensembl/database.py
@@ -226,7 +226,22 @@ def create(self, overwrite=False):
         primary_keys = {}
 
         for feature in feature_names:
-            df_subset = df[df.feature == feature]
+            # Some speices such as soybean, do not have a gene_name and transcript_name
+            if (
+                feature == "gene_name"
+                and "gene_id" in feature_names
+                and (df.feature == "gene_name").sum() == 0
+            ):
+                alias_feature = "gene_id"
+            if (
+                feature == "transcript_name"
+                and "transcript_id" in feature_names
+                and (df.feature == "transcript_name").sum() == 0
+            ):
+                alias_feature = "transcript_id"
+
+            alias_feature = feature
+            df_subset = df[df.feature == alias_feature]
             if len(df_subset) == 0:
                 continue
             dataframes[feature] = df_subset

From b2d2f62d858c2c10d3eee1bc3439c9a217efc525 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Wed, 10 Jan 2024 00:38:19 -0600
Subject: [PATCH 31/35] fix gene name error for soybean and some other species

---
 pyensembl/database.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/pyensembl/database.py b/pyensembl/database.py
index f91e82b..7f1e3f3 100644
--- a/pyensembl/database.py
+++ b/pyensembl/database.py
@@ -211,6 +211,16 @@ def create(self, overwrite=False):
             usecols=self.restrict_gtf_columns,
             features=self.restrict_gtf_features,
         )
+        # Some species such as soybean, do not have a gene_name and transcript_name
+        # but do have gene_id and transcript_id, use the as alias of names
+        if "gene_id" in df.columns and "gene_name" not in df.columns:
+            df["gene_name"] = df["gene_id"]
+        if (
+            "transcript_id" in df.columns
+            and "transcript_name" not in df.columns
+        ):
+            df["transcript_name"] = df["transcript_id"]
+
         all_index_groups = self._all_possible_indices(df.columns)
 
         if self.restrict_gtf_features:
@@ -227,21 +237,7 @@ def create(self, overwrite=False):
 
         for feature in feature_names:
             # Some speices such as soybean, do not have a gene_name and transcript_name
-            if (
-                feature == "gene_name"
-                and "gene_id" in feature_names
-                and (df.feature == "gene_name").sum() == 0
-            ):
-                alias_feature = "gene_id"
-            if (
-                feature == "transcript_name"
-                and "transcript_id" in feature_names
-                and (df.feature == "transcript_name").sum() == 0
-            ):
-                alias_feature = "transcript_id"
-
-            alias_feature = feature
-            df_subset = df[df.feature == alias_feature]
+            df_subset = df[df.feature == feature]
             if len(df_subset) == 0:
                 continue
             dataframes[feature] = df_subset

From 65b5d6d9ebc7e454871d4142a88c6abb7f8701b0 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Wed, 10 Jan 2024 00:39:09 -0600
Subject: [PATCH 32/35] fix gene name error for maize

---
 pyensembl/database.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyensembl/database.py b/pyensembl/database.py
index 7f1e3f3..b5fcd99 100644
--- a/pyensembl/database.py
+++ b/pyensembl/database.py
@@ -211,7 +211,7 @@ def create(self, overwrite=False):
             usecols=self.restrict_gtf_columns,
             features=self.restrict_gtf_features,
         )
-        # Some species such as soybean, do not have a gene_name and transcript_name
+        # Some species such as maize, do not have a gene_name and transcript_name
         # but do have gene_id and transcript_id, use the as alias of names
         if "gene_id" in df.columns and "gene_name" not in df.columns:
             df["gene_name"] = df["gene_id"]
@@ -236,7 +236,6 @@ def create(self, overwrite=False):
         primary_keys = {}
 
         for feature in feature_names:
-            # Some speices such as soybean, do not have a gene_name and transcript_name
             df_subset = df[df.feature == feature]
             if len(df_subset) == 0:
                 continue

From eec71157d91c02ba257c8bba755057c8ad06829d Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Wed, 10 Jan 2024 02:37:08 -0600
Subject: [PATCH 33/35] suport mRNA type

---
 pyensembl/locus_with_genome.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/pyensembl/locus_with_genome.py b/pyensembl/locus_with_genome.py
index 33dd38d..e93ef24 100644
--- a/pyensembl/locus_with_genome.py
+++ b/pyensembl/locus_with_genome.py
@@ -16,8 +16,8 @@
 
 class LocusWithGenome(Locus):
     """
-    Common base class for Gene and Transcript to avoid copying
-    their shared logic.
+    Common base class for Gene and Transcript to avoid copying their shared
+    logic.
     """
 
     def __init__(self, contig, start, end, strand, biotype, genome):
@@ -39,16 +39,17 @@ def to_dict(self):
     @property
     def is_protein_coding(self):
         """
-        We're not counting immunoglobulin-like genes from the T-cell receptor or
-        or antibodies since they occur in fragments that must be recombined.
-        It might be worth consider counting non-sense mediated decay and
-        non-stop decay since variants in these could potentially make a
-        functional protein. To read more about the biotypes used in Ensembl:
-            http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
-            http://www.gencodegenes.org/gencode_biotypes.html
-
-        For now let's stick with the simple category of 'protein_coding', which
-        means that there is an open reading frame in this gene/transcript
-        whose successful transcription has been observed.
+        We're not counting immunoglobulin-like genes from the T-cell receptor
+        or or antibodies since they occur in fragments that must be recombined.
+        It might be worth consider counting non-sense mediated decay and non-
+        stop decay since variants in these could potentially make a functional
+        protein. To read more about the biotypes used in Ensembl:
+        http://vega.sanger.ac.uk/info/about/gene_and_transcript_types.html
+        http://www.gencodegenes.org/gencode_biotypes.html.
+
+        For now let's stick with the simple category of
+        'protein_coding', which means that there is an open reading
+        frame in this gene/transcript whose successful transcription has
+        been observed.
         """
-        return self.biotype == "protein_coding"
+        return self.biotype in ["protein_coding", "mRNA"]

From 303ada4d7dab5402b3de001f4d1d421aef080b3c Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Wed, 10 Jan 2024 02:48:49 -0600
Subject: [PATCH 34/35] suport mRNA type

---
 pyensembl/locus_with_genome.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyensembl/locus_with_genome.py b/pyensembl/locus_with_genome.py
index e93ef24..338a222 100644
--- a/pyensembl/locus_with_genome.py
+++ b/pyensembl/locus_with_genome.py
@@ -52,4 +52,4 @@ def is_protein_coding(self):
         frame in this gene/transcript whose successful transcription has
         been observed.
         """
-        return self.biotype in ["protein_coding", "mRNA"]
+        return self.biotype in "protein_coding"

From b4928358dd5a89dc276ba8ce60065b43814ae0a1 Mon Sep 17 00:00:00 2001
From: Chang Ye <yech1990@gmail.com>
Date: Thu, 11 Jan 2024 14:47:01 -0600
Subject: [PATCH 35/35] fix conflict

---
 tests/common.py                         |  40 --------
 tests/data.py                           | 113 ++++++++++------------
 tests/test_contigs.py                   |   1 -
 tests/test_download_cache.py            |  32 +++---
 tests/test_ensembl_gtf.py               |   4 -
 tests/test_ensembl_object_properties.py |   1 -
 tests/test_exon_id.py                   | 123 ------------------------
 tests/test_exon_object.py               |  21 +---
 tests/test_gene_ids.py                  |  26 -----
 tests/test_gene_names.py                |  34 +------
 tests/test_gene_objects.py              |  42 --------
 tests/test_id_length.py                 |  20 ++--
 tests/test_locus.py                     |   5 -
 tests/test_missing_genome_sources.py    |  51 ----------
 tests/test_mouse.py                     |  10 --
 tests/test_release_versions.py          |  22 -----
 tests/test_search.py                    |  25 -----
 tests/test_sequence_data.py             |  26 -----
 tests/test_serialization.py             |  17 ----
 tests/test_shell.py                     |   4 +-
 tests/test_timings.py                   |  16 ---
 tests/test_transcript_ids.py            |  13 ---
 tests/test_transcript_objects.py        |  60 ------------
 tests/test_ucsc_gtf.py                  |  37 +------
 24 files changed, 82 insertions(+), 661 deletions(-)

diff --git a/tests/common.py b/tests/common.py
index 17a86e8..ea1ecff 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -1,18 +1,9 @@
 import functools
 
-<<<<<<< HEAD
-from pyensembl import (
-    genome_for_reference_name,
-    cached_release,
-    MAX_ENSEMBL_RELEASE,
-)
-from nose.tools import nottest
-=======
 from pyensembl import genome_for_reference_name, cached_release
 
 import pytest
 
->>>>>>> upstream/master
 
 grch37 = genome_for_reference_name("GRCh37")
 grch38 = genome_for_reference_name("GRCh38")
@@ -22,33 +13,6 @@
 contigs = [str(c) for c in range(1, 23)] + ["X", "Y", "M"]
 
 
-<<<<<<< HEAD
-@nottest
-def test_ensembl_releases(*versions):
-    """
-    Run a unit test which takes an EnsemblRelease as an argument
-    for multiple releases (most recent for each reference genome)
-    """
-
-    if len(versions) == 0:
-        ensembl_releases = major_releases
-    else:
-        if any(version > MAX_ENSEMBL_RELEASE for version in versions):
-            raise ValueError(
-                "Invalid ensembl release numbers: %s" % (versions,)
-            )
-        ensembl_releases = [cached_release(version) for version in versions]
-
-    def decorator(test_fn):
-        @functools.wraps(test_fn)
-        def new_test_fn():
-            for ensembl in ensembl_releases:
-                test_fn(ensembl)
-
-        return new_test_fn
-
-    return decorator
-=======
 def run_multiple_genomes(*versions):
     if len(versions) == 1 and callable(versions[0]):
         return pytest.mark.parametrize("genome", major_releases)(versions[0])
@@ -57,7 +21,6 @@ def run_multiple_genomes(*versions):
     else:
         genomes = [cached_release(v) for v in versions]
     return lambda fn: pytest.mark.parametrize("genome", genomes)(fn)
->>>>>>> upstream/master
 
 
 # TemporaryDirectory only got added to Python in version 3.2
@@ -81,8 +44,6 @@ def __exit__(self, type, value, traceback):
             rmtree(self.name)
             # don't suppress exceptions
             return False
-<<<<<<< HEAD
-=======
 
 
 def eq_(x, y, msg=None):
@@ -125,4 +86,3 @@ def lte_(x, y, msg=None):
         assert x <= y
     else:
         assert x <= y, msg
->>>>>>> upstream/master
diff --git a/tests/data.py b/tests/data.py
index eea2ed7..0b41369 100644
--- a/tests/data.py
+++ b/tests/data.py
@@ -21,31 +21,25 @@ def data_path(name):
 CTNNBIP1_004_transcript_id = "ENST00000377256"
 
 # coding sequence for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_CDS = "".join(
-    [
-        "ATG",
-        "AACCGCGAGGGAGCTCCCGGGAAGAGTCCGGAG",
-        "GAGATGTACATTCAGCAGAAGGTCCGAGTGCTGCTCATGCTGCGGAAGATGGGATCAAAC",
-        "CTGACAGCCAGCGAGGAGGAGTTCCTGCGCACCTATGCAGGGGTGGTCAACAGCCAGCTC",
-        "AGCCAGCTGCCTCCGCACTCCATCGACCAGG",
-        "GTGCAGAGGACGTGGTGATGGCGTTTTCCAGGTCGGAGACGGAAGACCGGAGGCAG",
-        "TAG",
-    ]
-)
+CTNNBIP1_004_CDS = "".join([
+    "ATG",
+    "AACCGCGAGGGAGCTCCCGGGAAGAGTCCGGAG",
+    "GAGATGTACATTCAGCAGAAGGTCCGAGTGCTGCTCATGCTGCGGAAGATGGGATCAAAC",
+    "CTGACAGCCAGCGAGGAGGAGTTCCTGCGCACCTATGCAGGGGTGGTCAACAGCCAGCTC",
+    "AGCCAGCTGCCTCCGCACTCCATCGACCAGG",
+    "GTGCAGAGGACGTGGTGATGGCGTTTTCCAGGTCGGAGACGGAAGACCGGAGGCAG",
+    "TAG"
+])
 
 # 5' UTR for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_UTR5 = "".join(
-    [
-        "TGTGGGTGCAGGTTTCCTGGGCTTGCCAGACACACAGGGCGGCACCTTCCTACTTCTGCC",
-        "CAGCCACAGCCCTCCCCTCACAGTTGAGCACCTGTTTGCCTGAAGTTAATTTCCAGAAGC",
-        "AGGAGTCCCCAGAGCCAGGCAGGGGG",
-    ]
-)
+CTNNBIP1_004_UTR5 = "".join([
+    "TGTGGGTGCAGGTTTCCTGGGCTTGCCAGACACACAGGGCGGCACCTTCCTACTTCTGCC",
+    "CAGCCACAGCCCTCCCCTCACAGTTGAGCACCTGTTTGCCTGAAGTTAATTTCCAGAAGC",
+    "AGGAGTCCCCAGAGCCAGGCAGGGGG"])
 
 # 3' UTR for beta-catenin interacting protein (CTNNBIP1-004)
-CTNNBIP1_004_UTR3 = (
+CTNNBIP1_004_UTR3 = \
     "CTGCAAAGCCCTTGGAACACCCTGGATGCTGTTGAGGGCCAAGAGATCTGTGTGGCTCC"
-)
 
 CTNNBIP1_004_locus = Locus("1", 9850659, 9878176, "-")
 
@@ -53,14 +47,20 @@ def data_path(name):
 # http://useast.ensembl.org/Homo_sapiens/Transcript/Exons?g=ENSG00000178585;
 # r=1:9850659-9878176;redirect=no;t=ENST00000377256
 CTTNNIP1_004_exon_ids = [
-    "ENSE00001473268",
-    "ENSE00001643659",
-    "ENSE00001600669",
-    "ENSE00001267940",
-    "ENSE00001473265",
+    'ENSE00001473268',
+    'ENSE00001643659',
+    'ENSE00001600669',
+    'ENSE00001267940',
+    'ENSE00001473265',
 ]
 
-CTTNNIP1_004_exon_lengths = [37, 85, 120, 91, 118]
+CTTNNIP1_004_exon_lengths = [
+    37,
+    85,
+    120,
+    91,
+    118
+]
 
 
 #
@@ -72,28 +72,26 @@ def data_path(name):
 EGFR_001_transcript_id = "ENST00000275493"
 EGFR_001_ccds_id = "CCDS5514"
 EGFR_001_protein_id = "ENSP00000275493"
-EGFR_001_protein_sequence = "".join(
-    [
-        "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYV",
-        "QRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNL",
-        "QEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKL",
-        "TKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVN",
-        "PEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLS",
-        "INATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAF",
-        "ENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKI",
-        "ISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHP",
-        "ECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTG",
-        "PGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPN",
-        "QALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVD",
-        "NPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAA",
-        "RNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTF",
-        "GSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLV",
-        "IQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACI",
-        "DRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPS"
-        "RDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGS"
-        "TAENAEYLRVAPQSSEFIGA",
-    ]
-)
+EGFR_001_protein_sequence = "".join([
+    "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYV",
+    "QRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNL",
+    "QEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKL",
+    "TKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVN",
+    "PEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLS",
+    "INATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAF",
+    "ENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKI",
+    "ISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHP",
+    "ECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTG",
+    "PGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPN",
+    "QALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVD",
+    "NPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAA",
+    "RNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTF",
+    "GSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLV",
+    "IQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACI",
+    "DRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPS"
+    "RDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGS"
+    "TAENAEYLRVAPQSSEFIGA"
+])
 
 
 # GTF cropped from ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus/
@@ -116,28 +114,21 @@ def data_path(name):
 # http://useast.ensembl.org/Mus_musculus/Gene/Summary?db=core;g=ENSMUSG00000017167
 
 MOUSE_ENSMUSG00000017167_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf"
-)
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.gtf")
 MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa"
-)
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.fa")
 MOUSE_ENSMUSG00000088969_NCRNA_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa"
-)
+    "mouse.ensembl.81.partial.ncrna.ENSMUSG00000017167.fa")
 MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH = data_path(
-    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep"
-)
+    "mouse.ensembl.81.partial.ENSMUSG00000017167.pep")
 
 
 custom_mouse_genome_grcm38_subset = Genome(
     reference_name="GRCm38",
     annotation_name="_test_mouse_ensembl81_subset",
     gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-    transcript_fasta_paths_or_urls=[
-        MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
-    ],
-    protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
-)
+    transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
+    protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH])
 
 
 def setup_init_custom_mouse_genome():
diff --git a/tests/test_contigs.py b/tests/test_contigs.py
index b4eb702..1101061 100644
--- a/tests/test_contigs.py
+++ b/tests/test_contigs.py
@@ -2,7 +2,6 @@
 
 grch38 = genome_for_reference_name("GRCh38")
 
-
 def test_contig_names():
     contig_names = set(grch38.contigs())
     for chrom in list(range(1, 23)) + ["X", "Y", "MT"]:
diff --git a/tests/test_download_cache.py b/tests/test_download_cache.py
index 3194a01..03c7da6 100644
--- a/tests/test_download_cache.py
+++ b/tests/test_download_cache.py
@@ -2,7 +2,7 @@
 from pyensembl.download_cache import (
     DownloadCache,
     MissingLocalFile,
-    MissingRemoteFile,
+    MissingRemoteFile
 )
 
 import os
@@ -13,27 +13,21 @@
 download_cache = DownloadCache(
     reference_name="__test_reference",
     annotation_name="__test_annotation",
-    copy_local_files_to_cache=False,
-)
-
+    copy_local_files_to_cache=False)
 
 def test_download_cache_missing_local_file():
     # clear the cache
     download_cache.delete_cache_directory()
     with assert_raises(MissingLocalFile):
         download_cache.download_or_copy_if_necessary(
-            path_or_url="test_file_doesn_not_exist.file"
-        )
-
+            path_or_url="test_file_doesn_not_exist.file")
 
 def test_download_cache_missing_remote_file():
     # clear the cache
     download_cache.delete_cache_directory()
     with assert_raises(MissingRemoteFile):
         download_cache.download_or_copy_if_necessary(
-            path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL"
-        )
-
+            path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL")
 
 def test_download_cache_custom_location():
     test_file = "refseq.ucsc.small.gtf"
@@ -42,27 +36,29 @@ def test_download_cache_custom_location():
     print("DIR: %s" % tmp_dir)
     assert tmp_dir is not None
 
-    os.environ["PYENSEMBL_CACHE_DIR"] = tmp_dir
+    os.environ['PYENSEMBL_CACHE_DIR'] = tmp_dir
 
     # We need another instance of DownloadCache
     # that copies files over to cache folder
     download_cache = DownloadCache(
         reference_name="test_reference",
         annotation_name="test_annotation",
-        copy_local_files_to_cache=True,
-    )
+        copy_local_files_to_cache=True)
 
     # clean up
     download_cache.delete_cache_directory()
     download_cache.download_or_copy_if_necessary(
-        download_if_missing=True, path_or_url=data_path(test_file)
-    )
+        download_if_missing=True,
+        path_or_url=data_path(test_file))
 
     full_path = os.path.join(
-        tmp_dir, "pyensembl", "test_reference", "test_annotation", test_file
-    )
+        tmp_dir,
+        "pyensembl",
+        "test_reference",
+        "test_annotation",
+        test_file)
     print("FULL PATH: %s" % full_path)
     assert len(full_path) > 0
 
     ok_(os.path.exists(full_path))
-    del os.environ["PYENSEMBL_CACHE_DIR"]
+    del os.environ['PYENSEMBL_CACHE_DIR']
diff --git a/tests/test_ensembl_gtf.py b/tests/test_ensembl_gtf.py
index 6ee741b..14330e2 100644
--- a/tests/test_ensembl_gtf.py
+++ b/tests/test_ensembl_gtf.py
@@ -5,11 +5,7 @@
 from .common import run_multiple_genomes
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def gtf_path_endswith_gtf_gz(ensembl):
     path = ensembl.gtf.gtf_path
     assert exists(path)
diff --git a/tests/test_ensembl_object_properties.py b/tests/test_ensembl_object_properties.py
index b3c4582..ff90dcf 100644
--- a/tests/test_ensembl_object_properties.py
+++ b/tests/test_ensembl_object_properties.py
@@ -8,7 +8,6 @@
 from nose.tools import eq_
 from pyensembl import EnsemblRelease, MAX_ENSEMBL_RELEASE
 
-
 def test_human_reference_name():
     eq_(EnsemblRelease(release=54).reference_name, "NCBI36")
     eq_(EnsemblRelease(release=74).reference_name, "GRCh37")
diff --git a/tests/test_exon_id.py b/tests/test_exon_id.py
index 981cdab..ceb145f 100644
--- a/tests/test_exon_id.py
+++ b/tests/test_exon_id.py
@@ -10,61 +10,6 @@
 
 # all exons associated with TP53 gene in Ensembl release 77
 TP53_EXON_IDS_RELEASE_77 = [
-<<<<<<< HEAD
-    "ENSE00002337729",
-    "ENSE00002419584",
-    "ENSE00003625790",
-    "ENSE00003518480",
-    "ENSE00003723991",
-    "ENSE00003712342",
-    "ENSE00001657961",
-    "ENSE00003725258",
-    "ENSE00003740946",
-    "ENSE00002204316",
-    "ENSE00002064269",
-    "ENSE00003750554",
-    "ENSE00003634848",
-    "ENSE00003492844",
-    "ENSE00003735852",
-    "ENSE00003545950",
-    "ENSE00003605891",
-    "ENSE00002051192",
-    "ENSE00002084733",
-    "ENSE00003726882",
-    "ENSE00001146308",
-    "ENSE00002667911",
-    "ENSE00003752869",
-    "ENSE00003739898",
-    "ENSE00003753508",
-    "ENSE00002034209",
-    "ENSE00002030826",
-    "ENSE00001596491",
-    "ENSE00002037735",
-    "ENSE00003736616",
-    "ENSE00002672443",
-    "ENSE00002226620",
-    "ENSE00003715195",
-    "ENSE00003750794",
-    "ENSE00003745267",
-    "ENSE00003746220",
-    "ENSE00003656695",
-    "ENSE00003669712",
-    "ENSE00002051873",
-    "ENSE00002048269",
-    "ENSE00002670535",
-    "ENSE00002677565",
-    "ENSE00003532881",
-    "ENSE00003520683",
-    "ENSE00002076714",
-    "ENSE00002062958",
-    "ENSE00002073243",
-    "ENSE00003670707",
-    "ENSE00002065802",
-    "ENSE00002362269",
-]
-
-
-=======
     'ENSE00002337729', 'ENSE00002419584',
     'ENSE00003625790', 'ENSE00003518480',
     'ENSE00003723991', 'ENSE00003712342',
@@ -92,25 +37,11 @@
     'ENSE00002065802', 'ENSE00002362269'
 ]
 
->>>>>>> upstream/master
 def test_exon_ids_of_gene_id():
     """
     test_exon_ids_of_gene_id: Ensure that gene_id ENSG00000141510 (name=TP53),
     has all the same exon IDs found on the Ensembl website.
     """
-<<<<<<< HEAD
-    exon_ids = ensembl.exon_ids_of_gene_id("ENSG00000141510")
-    assert len(exon_ids) == len(
-        TP53_EXON_IDS_RELEASE_77
-    ), "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
-        len(TP53_EXON_IDS_RELEASE_77),
-        len(exon_ids),
-        len(set(exon_ids)),
-    )
-    assert all(exon_id in TP53_EXON_IDS_RELEASE_77 for exon_id in exon_ids)
-
-
-=======
     exon_ids = ensembl.exon_ids_of_gene_id('ENSG00000141510')
     assert len(exon_ids) == len(TP53_EXON_IDS_RELEASE_77), \
         "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
@@ -119,38 +50,12 @@ def test_exon_ids_of_gene_id():
             len(set(exon_ids)))
     assert all(exon_id in TP53_EXON_IDS_RELEASE_77 for exon_id in exon_ids)
 
->>>>>>> upstream/master
 def test_exon_ids_of_gene_name():
     """
     test_exon_ids_of_gene_name: Ensure that TP53 has the same exon IDs found
     on the Ensembl website.
     """
     exon_ids = ensembl.exon_ids_of_gene_name("TP53")
-<<<<<<< HEAD
-    assert len(exon_ids) == len(
-        TP53_EXON_IDS_RELEASE_77
-    ), "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
-        len(TP53_EXON_IDS_RELEASE_77),
-        len(exon_ids),
-        len(set(exon_ids)),
-    )
-    assert all(exon_id in TP53_EXON_IDS_RELEASE_77 for exon_id in exon_ids)
-
-
-# Exon IDs of transcript TP53-026
-TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77 = [
-    "ENSE00002064269",
-    "ENSE00003723991",
-    "ENSE00003712342",
-    "ENSE00003725258",
-    "ENSE00003740946",
-    "ENSE00003750554",
-    "ENSE00003634848",
-    "ENSE00003492844",
-]
-
-
-=======
     assert len(exon_ids) == len(TP53_EXON_IDS_RELEASE_77), \
         "Wrong number of exons, expected %d but got %d (n_distinct=%d)" % (
             len(TP53_EXON_IDS_RELEASE_77),
@@ -170,7 +75,6 @@ def test_exon_ids_of_gene_name():
     'ENSE00003492844'
 ]
 
->>>>>>> upstream/master
 def test_exon_ids_of_transcript_name():
     """
     test_exon_ids_of_transcript_name : Look up exon IDs of transcript TP53-026
@@ -178,19 +82,6 @@ def test_exon_ids_of_transcript_name():
     for release 77
     """
     exon_ids = ensembl.exon_ids_of_transcript_name("TP53-026")
-<<<<<<< HEAD
-    assert len(exon_ids) == len(
-        TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-    ), "Expected %d exons, got %d" % (
-        len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
-        len(exon_ids),
-    )
-    assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-        for exon_id in exon_ids
-    )
-
-=======
     assert len(exon_ids) == len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77), \
         "Expected %d exons, got %d" % (
             len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
@@ -198,7 +89,6 @@ def test_exon_ids_of_transcript_name():
     assert all(
         exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
         for exon_id in exon_ids)
->>>>>>> upstream/master
 
 def exon_ids_of_transcript_id():
     """
@@ -207,18 +97,6 @@ def exon_ids_of_transcript_id():
     what we find on the Ensembl website.
     """
     exon_ids = ensembl.exon_ids_of_transcript_id("ENST00000610623")
-<<<<<<< HEAD
-    assert len(exon_ids) == len(
-        TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-    ), "Expected %d exons, got %d" % (
-        len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
-        len(exon_ids),
-    )
-    assert all(
-        exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
-        for exon_id in exon_ids
-    )
-=======
     assert len(exon_ids) == len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77), \
         "Expected %d exons, got %d" % (
             len(TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77),
@@ -226,4 +104,3 @@ def exon_ids_of_transcript_id():
     assert all(
         exon_id in TP53_TRANSCRIPT_26_EXON_IDS_RELEASE_77
         for exon_id in exon_ids)
->>>>>>> upstream/master
diff --git a/tests/test_exon_object.py b/tests/test_exon_object.py
index 40d2724..4587284 100644
--- a/tests/test_exon_object.py
+++ b/tests/test_exon_object.py
@@ -9,16 +9,14 @@
 
 ensembl = cached_release(77)
 
-
 def test_exon_object_by_id():
     """
     test_exon_object_by_id : check properties of exon 4 of CTNNB1 when looked
     up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00003464041")
-    assert exon.gene_name == "CTNNB1", (
+    assert exon.gene_name == "CTNNB1", \
         "Unexpected gene name: %s" % exon.gene_name
-    )
     assert exon.contig == "3", exon.contig
     assert exon.strand == "+"
     assert exon.on_forward_strand
@@ -27,16 +25,14 @@ def test_exon_object_by_id():
     assert exon.end == 41224753, "Unexpected exon end: %s" % exon.end
     assert exon.length == len(exon) == 228
 
-
 def test_exon_object_by_id_on_negative_strand():
     """
     test_exon_object_by_id_on_negative_strand : check properties of exon 1
     from CXCR3 when looked up by ID in Ensembl 77.
     """
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert exon.gene_name == "CXCR3", (
+    assert exon.gene_name == "CXCR3", \
         "Unexpected gene name: %s" % exon.gene_name
-    )
     assert exon.contig == "X", exon.contig
     assert exon.strand == "-"
     assert exon.on_backward_strand
@@ -61,7 +57,6 @@ def test_exon_object_at_locus():
         assert exon.start <= 41224526, "Unexpected exon start: %s" % exon.start
         assert exon.end >= 41224526, "Unexpected exon end: %s" % exon.end
 
-
 def test_exon_object_at_locus_on_negative_strand():
     """
     test_exon_object_at_locus : check properties of exon 1 of CXCR3 when looked
@@ -77,7 +72,6 @@ def test_exon_object_at_locus_on_negative_strand():
         assert exon.start <= 71618517, "Unexpected exon start: %s" % exon.start
         assert exon.end >= 71618517, "Unexpected exon end: %s" % exon.end
 
-
 def test_exon_basic_properties_str():
     exon = ensembl.exon_by_id("ENSE00001817013")
     assert isinstance(str(exon), str)
@@ -87,16 +81,11 @@ def test_exon_basic_properties_str():
     # change this test
     assert str(exon) == repr(exon), "%s != %s" % (str(exon), repr(exon))
 
-
 def test_exon_basic_properties_hash():
     exon = ensembl.exon_by_id("ENSE00001817013")
-    assert isinstance(
-        hash(exon), int
-    ), "Hash function returns %s instead of int" % (
-        type(
-            hash(exon),
-        )
-    )
+    assert isinstance(hash(exon), int), \
+        "Hash function returns %s instead of int" % (
+            type(hash(exon),))
     assert hash(exon) == hash(exon), "Hash function is non-deterministic!"
     other_exon = ensembl.exon_by_id("ENSE00003464041")
     assert exon != other_exon
diff --git a/tests/test_gene_ids.py b/tests/test_gene_ids.py
index 612b3cf..436eaef 100644
--- a/tests/test_gene_ids.py
+++ b/tests/test_gene_ids.py
@@ -22,13 +22,7 @@ def test_gene_ids_grch38_hla_a():
     # Summary?db=core;g=ENSG00000206503;r=6:29941260-29945884
     ids = ensembl_grch38.gene_ids_at_locus(6, 29945884)
     expected = "ENSG00000206503"
-<<<<<<< HEAD
-    assert ids == [
-        "ENSG00000206503"
-    ], "Expected HLA-A, gene ID = %s, got: %s" % (
-=======
     assert ids == ["ENSG00000206503"], "Expected HLA-A, gene ID = %s, got: %s" % (
->>>>>>> upstream/master
         expected,
         ids,
     )
@@ -47,24 +41,12 @@ def test_gene_ids_of_gene_name_hla_grch38():
 
 def test_gene_id_of_protein_id_release77():
     gene_id = ensembl77.gene_id_of_protein_id("ENSP00000485677")
-<<<<<<< HEAD
-    ok_("ENSG00000279634", gene_id)
-
-
-def test_gene_id_of_invalid_name():
-    with assert_raises(Exception):
-        ensembl_grch38.gene_ids_of_gene_name(
-            "A wonderous pony sees through your soul"
-        )
-
-=======
     eq_("ENSG00000279634", gene_id)
 
 
 def test_gene_id_of_invalid_name():
     with raises(Exception):
         ensembl_grch38.gene_ids_of_gene_name("A wonderous pony sees through your soul")
->>>>>>> upstream/master
 
 
 @run_multiple_genomes()
@@ -76,11 +58,7 @@ def test_gene_ids_on_contig(genome):
         tp53 in gene_ids_chr17
     ), "Missing %s from %s on chr17, example IDs: %s (total = %d)" % (
         tp53,
-<<<<<<< HEAD
-        ensembl,
-=======
         genome,
->>>>>>> upstream/master
         gene_ids_chr17[:5],
         len(gene_ids_chr17),
     )
@@ -92,11 +70,7 @@ def test_gene_ids_on_contig(genome):
         smad4 in gene_ids_chr18
     ), "Missing %s from %s on chr18, example result: %s (total = %d)" % (
         smad4,
-<<<<<<< HEAD
-        ensembl,
-=======
         genome,
->>>>>>> upstream/master
         gene_ids_chr18[:5],
         len(gene_ids_chr18),
     )
diff --git a/tests/test_gene_names.py b/tests/test_gene_names.py
index f343bfc..61f4480 100644
--- a/tests/test_gene_names.py
+++ b/tests/test_gene_names.py
@@ -18,13 +18,8 @@
 ]
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-def test_all_gene_names(ensembl):
-=======
 @run_multiple_genomes()
 def test_all_gene_names(genome):
->>>>>>> upstream/master
     """
     test_all_gene_names : Make sure some known gene names such as
     SMAD4, TP53, ERBB2, &c
@@ -34,11 +29,7 @@ def test_all_gene_names(genome):
     for gene_name in KNOWN_GENE_NAMES:
         assert gene_name in gene_names, "Missing gene name %s from %s" % (
             gene_name,
-<<<<<<< HEAD
-            ensembl,
-=======
             genome,
->>>>>>> upstream/master
         )
 
 
@@ -50,25 +41,6 @@ def test_gene_names_at_locus_grch38_hla_a():
     names = grch38.gene_names_at_locus(6, 29945884)
     assert names == ["HLA-A"], "Expected gene name HLA-A, got: %s" % (names,)
 
-<<<<<<< HEAD
-
-@test_ensembl_releases()
-def test_gene_names_on_contig(ensembl):
-    gene_names_chr17 = ensembl.gene_names(17)
-    assert (
-        "TP53" in gene_names_chr17
-    ), "No TP53 in gene names on chr17 of %s, gene names: %s ... (%d)" % (
-        ensembl,
-        list(gene_names_chr17[:4]),
-        len(gene_names_chr17),
-    )
-
-    gene_names_chr18 = ensembl.gene_names(18)
-    assert (
-        "SMAD4" in gene_names_chr18
-    ), "No SMAD4 in gene names on chr18 of %s, gene names: %s ... (%d)" % (
-        ensembl,
-=======
 
 @run_multiple_genomes()
 def test_gene_names_on_contig(genome):
@@ -86,7 +58,6 @@ def test_gene_names_on_contig(genome):
         "SMAD4" in gene_names_chr18
     ), "No SMAD4 in gene names on chr18 of %s, gene names: %s ... (%d)" % (
         genome,
->>>>>>> upstream/master
         list(gene_names_chr18[:4]),
         len(gene_names_chr18),
     )
@@ -96,9 +67,6 @@ def test_gene_name_of_HLA_gene_id():
     gene_ids = grch38.gene_ids_of_gene_name("HLA-A")
     gene_names = [grch38.gene_name_of_gene_id(gene_id) for gene_id in gene_ids]
     unique_gene_names = list(set(gene_names))
-    assert len(unique_gene_names) == 1, (
-        len(unique_gene_names),
-        unique_gene_names,
-    )
+    assert len(unique_gene_names) == 1, (len(unique_gene_names), unique_gene_names)
     gene_name = unique_gene_names[0]
     assert gene_name == "HLA-A", gene_name
diff --git a/tests/test_gene_objects.py b/tests/test_gene_objects.py
index 65a078f..e66f639 100644
--- a/tests/test_gene_objects.py
+++ b/tests/test_gene_objects.py
@@ -4,67 +4,33 @@
 from .data import TP53_gene_id
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def test_TP53_gene_object_by_id(genome):
     # when we look up TP53 by its gene ID, we should get the
     # correct gene back
     gene = genome.gene_by_id(TP53_gene_id)
-<<<<<<< HEAD
-    assert (
-        gene.name == "TP53"
-    ), "Incorrect gene name %s for gene ID %s in %s" % (
-=======
     assert gene.name == "TP53", "Incorrect gene name %s for gene ID %s in %s" % (
->>>>>>> upstream/master
         gene.name,
         gene.id,
         genome,
     )
-<<<<<<< HEAD
-    assert (
-        gene.contig == "17"
-    ), "Incorrect gene contig %s for gene ID %s in %s" % (
-=======
     assert gene.contig == "17", "Incorrect gene contig %s for gene ID %s in %s" % (
->>>>>>> upstream/master
         gene.contig,
         gene.id,
         genome,
     )
-<<<<<<< HEAD
-
-=======
->>>>>>> upstream/master
 
 
 @run_multiple_genomes()
 def test_TP53_gene_object_by_name(genome):
     genes = genome.genes_by_name("TP53")
     # we should only have one TP53 gene (there aren't any copies)
-<<<<<<< HEAD
-    assert len(genes) == 1, "Expected only one gene with name TP53, got %s" % (
-        genes,
-    )
-    # make sure it has the correct gene ID
-    assert (
-        genes[0].id == TP53_gene_id
-    ), "Expected gene to have ID %s, got %s" % (
-        TP53_gene_id,
-        genes[0].id,
-    )
-
-=======
     assert len(genes) == 1, "Expected only one gene with name TP53, got %s" % (genes,)
     # make sure it has the correct gene ID
     assert genes[0].id == TP53_gene_id, "Expected gene to have ID %s, got %s" % (
         TP53_gene_id,
         genes[0].id,
     )
->>>>>>> upstream/master
 
 
 @run_multiple_genomes()
@@ -77,11 +43,7 @@ def test_equal_genes(genome):
     assert gene1 == gene2
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def test_not_equal_genes(genome):
     gene1 = genome.genes_by_name("MUC1")[0]
     gene2 = genome.genes_by_name("BRCA1")[0]
@@ -89,11 +51,7 @@ def test_not_equal_genes(genome):
     assert gene1 != gene2
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def test_BRCA1_protein_coding_biotype(genome):
     gene = genome.genes_by_name("BRCA1")[0]
     assert gene.is_protein_coding
diff --git a/tests/test_id_length.py b/tests/test_id_length.py
index 2d48877..7371cd4 100644
--- a/tests/test_id_length.py
+++ b/tests/test_id_length.py
@@ -2,7 +2,6 @@
 
 from nose.tools import nottest
 
-
 @nottest
 def check_id_length(method_name):
     for release in major_releases:
@@ -10,21 +9,16 @@ def check_id_length(method_name):
         # only load chromosome Y to speed up tests
         idents = method(contig="Y")
         assert len(idents) > 0, "No values returned by %s" % method_name
-        assert all(
-            len(ident) == 15 for ident in idents
-        ), "Invalid IDs for %s: %s" % (
-            method_name,
-            [ident for ident in idents if len(ident) != 15],
-        )
-
+        assert all(len(ident) == 15 for ident in idents), \
+            "Invalid IDs for %s: %s" % (
+                method_name,
+                [ident for ident in idents if len(ident) != 15])
 
 def test_gene_id_length():
-    check_id_length("gene_ids")
-
+    check_id_length('gene_ids')
 
 def test_transcript_id_length():
-    check_id_length("transcript_ids")
-
+    check_id_length('transcript_ids')
 
 def test_protein_id_length():
-    check_id_length("protein_ids")
+    check_id_length('protein_ids')
diff --git a/tests/test_locus.py b/tests/test_locus.py
index 475a018..a1af6fd 100644
--- a/tests/test_locus.py
+++ b/tests/test_locus.py
@@ -3,7 +3,6 @@
 
 from nose.tools import assert_raises
 
-
 def test_normalize_chromosome():
     assert normalize_chromosome("X") == "X"
     assert normalize_chromosome("chrX") == "chrX"
@@ -39,7 +38,6 @@ def test_normalize_chromosome():
     with assert_raises(ValueError):
         normalize_chromosome(0)
 
-
 def test_locus_overlaps():
     locus = Locus("1", 10, 20, "+")
     assert locus.overlaps("1", 10, 20, "+")
@@ -59,7 +57,6 @@ def test_locus_overlaps():
     # wrong strand
     assert not locus.overlaps("1", 10, 20, "-")
 
-
 def test_locus_contains():
     locus = Locus("1", 10, 20, "+")
     assert locus.contains("1", 10, 20, "+")
@@ -85,7 +82,6 @@ def test_locus_contains():
     # wrong strand
     assert not locus.contains("1", 10, 20, "-")
 
-
 def test_position_offset():
     forward_locus = Locus("1", 10, 20, "+")
     assert forward_locus.offset(10) == 0
@@ -147,7 +143,6 @@ def test_range_offset():
     with assert_raises(ValueError):
         negative_locus.offset_range(9, 10)
 
-
 def test_locus_distance():
     locus_chr1_10_20_pos = Locus("1", 10, 20, "+")
     locus_chr1_21_25_pos = Locus("1", 21, 25, "+")
diff --git a/tests/test_missing_genome_sources.py b/tests/test_missing_genome_sources.py
index f2936ad..5129c18 100644
--- a/tests/test_missing_genome_sources.py
+++ b/tests/test_missing_genome_sources.py
@@ -14,28 +14,6 @@
 MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH = data_path(
     "mouse.ensembl.81.partial.ENSMUSG00000017167.pep"
 )
-<<<<<<< HEAD
-
-
-def no_gtf_(cm):
-    print("Testing for 'GTF' in %s : %s" % (type(cm.exception), cm.exception))
-    ok_("GTF" in str(cm.exception))
-
-
-def no_transcript_(cm):
-    print(
-        "Testing for 'transcript' in %s : %s"
-        % (type(cm.exception), cm.exception)
-    )
-    ok_("transcript" in str(cm.exception))
-
-
-def no_protein_(cm):
-    print(
-        "Testing for 'protein' in %s : %s" % (type(cm.exception), cm.exception)
-    )
-    ok_("protein" in str(cm.exception))
-=======
 
 
 def no_gtf_(e):
@@ -52,20 +30,12 @@ def no_protein_(e):
     print("Testing for 'protein' in %s : %s" % (type(e), e))
     assert "protein" in str(e)
 
->>>>>>> upstream/master
-
 
 def test_transcript_fasta_only():
     genome = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-<<<<<<< HEAD
-        transcript_fasta_paths_or_urls=[
-            MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
-        ],
-=======
         transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
->>>>>>> upstream/master
     )
     genome.index()
 
@@ -92,18 +62,11 @@ def test_transcript_fasta_only():
     no_protein_(e)
 
 
-
 def test_protein_fasta_only():
     genome_only_proteins = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
-<<<<<<< HEAD
-        protein_fasta_paths_or_urls=[
-            MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH
-        ],
-=======
         protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
->>>>>>> upstream/master
     )
     genome_only_proteins.index()
 
@@ -118,7 +81,6 @@ def test_protein_fasta_only():
     no_transcript_(e)
 
 
-
 def test_gtf_only():
     genome_only_gtf = Genome(
         reference_name="GRCm38",
@@ -145,13 +107,7 @@ def test_gtf_transcript_only():
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-<<<<<<< HEAD
-        transcript_fasta_paths_or_urls=[
-            MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH
-        ],
-=======
         transcript_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_TRANSCRIPT_FASTA_PATH],
->>>>>>> upstream/master
     )
     genome_gtf_with_cdna.index()
 
@@ -165,19 +121,12 @@ def test_gtf_transcript_only():
     no_protein_(e)
 
 
-
 def test_gtf_protein_only():
     genome_gtf_with_proteins = Genome(
         reference_name="GRCm38",
         annotation_name="_test_mouse_ensembl81_subset",
         gtf_path_or_url=MOUSE_ENSMUSG00000017167_PATH,
-<<<<<<< HEAD
-        protein_fasta_paths_or_urls=[
-            MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH
-        ],
-=======
         protein_fasta_paths_or_urls=[MOUSE_ENSMUSG00000017167_PROTEIN_FASTA_PATH],
->>>>>>> upstream/master
     )
     genome_gtf_with_proteins.index()
 
diff --git a/tests/test_mouse.py b/tests/test_mouse.py
index 0ad6f50..fdbf8bd 100644
--- a/tests/test_mouse.py
+++ b/tests/test_mouse.py
@@ -1,17 +1,7 @@
 from .common import eq_
 from .data import custom_mouse_genome_grcm38_subset, setup_init_custom_mouse_genome
 
-<<<<<<< HEAD
-from .data import (
-    custom_mouse_genome_grcm38_subset,
-    setup_init_custom_mouse_genome,
-)
 
-
-@with_setup(setup=setup_init_custom_mouse_genome)
-=======
-
->>>>>>> upstream/master
 def test_mouse_ENSMUSG00000017167():
     """
     GTF cropped from ftp://ftp.ensembl.org/pub/release-81/gtf/mus_musculus/
diff --git a/tests/test_release_versions.py b/tests/test_release_versions.py
index 68972d9..0be325b 100644
--- a/tests/test_release_versions.py
+++ b/tests/test_release_versions.py
@@ -8,47 +8,25 @@ def test_version_too_old_1():
         EnsemblRelease(1)
 
 
-<<<<<<< HEAD
-
-@raises(Exception)
-=======
->>>>>>> upstream/master
 def test_version_too_old_47():
     with raises(Exception):
         EnsemblRelease(47)
 
 
-<<<<<<< HEAD
-
-@raises(Exception)
-=======
->>>>>>> upstream/master
 def test_version_is_not_numeric():
     with raises(Exception):
         EnsemblRelease("wuzzle")
 
 
-<<<<<<< HEAD
-
-@raises(Exception)
-=======
->>>>>>> upstream/master
 def test_version_is_none():
     with raises(Exception):
         EnsemblRelease(None)
 
 
-
 def test_max_ensembl_release():
     assert isinstance(
         MAX_ENSEMBL_RELEASE, int
-<<<<<<< HEAD
-    ), "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (
-        type(MAX_ENSEMBL_RELEASE),
-    )
-=======
     ), "Unexpected type for MAX_ENSEMBL_RELEASE: %s" % (type(MAX_ENSEMBL_RELEASE),)
->>>>>>> upstream/master
     assert 83 <= MAX_ENSEMBL_RELEASE < 1000, (
         "Unexpected value for MAX_ENSEMBL_RELEASE: %d" % MAX_ENSEMBL_RELEASE
     )
diff --git a/tests/test_search.py b/tests/test_search.py
index ed4f448..f4aa8e3 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -4,15 +4,9 @@
 from .common import run_multiple_genomes
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-def test_find_nearest_BRAF_exon(ensembl):
-    braf = ensembl.genes_by_name("BRAF")[0]
-=======
 @run_multiple_genomes()
 def test_find_nearest_BRAF_exon(genome):
     braf = genome.genes_by_name("BRAF")[0]
->>>>>>> upstream/master
     braf_transcripts = braf.transcripts
     exons = braf_transcripts[0].exons
     for exon in exons:
@@ -35,40 +29,21 @@ def test_find_nearest_BRAF_exon(genome):
         eq_(result_after, (1, exon))
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-def test_find_nearest_BRAF_transcript(ensembl):
-    braf_transcript = ensembl.genes_by_name("BRAF")[0].transcripts[0]
-    egfr_transcript = ensembl.genes_by_name("EGFR")[0].transcripts[0]
-=======
 @run_multiple_genomes()
 def test_find_nearest_BRAF_transcript(genome):
     braf_transcript = genome.genes_by_name("BRAF")[0].transcripts[0]
     egfr_transcript = genome.genes_by_name("EGFR")[0].transcripts[0]
->>>>>>> upstream/master
     transcripts = [braf_transcript, egfr_transcript]
     for transcript in transcripts:
         # immediately before transcript
         result_before = find_nearest_locus(
-<<<<<<< HEAD
-            start=transcript.start - 2,
-            end=transcript.start - 1,
-            loci=transcripts,
-=======
             start=transcript.start - 2, end=transcript.start - 1, loci=transcripts
->>>>>>> upstream/master
         )
         eq_(result_before, (1, transcript))
 
         # overlapping with transcript
         result_overlap = find_nearest_locus(
-<<<<<<< HEAD
-            start=transcript.start - 2,
-            end=transcript.start + 1,
-            loci=transcripts,
-=======
             start=transcript.start - 2, end=transcript.start + 1, loci=transcripts
->>>>>>> upstream/master
         )
         eq_(result_overlap, (0, transcript))
 
diff --git a/tests/test_sequence_data.py b/tests/test_sequence_data.py
index 1d3cc02..1d8b7fd 100644
--- a/tests/test_sequence_data.py
+++ b/tests/test_sequence_data.py
@@ -18,17 +18,8 @@ def test_sequence_type():
     with TemporaryDirectory() as tmpdir:
         seqs_dna = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
         seq = seqs_dna.get("ENSMUST00000138942")
-<<<<<<< HEAD
-        assert (
-            seq is not None
-        ), "Failed to find sequence for ENSMUST00000138942"
-        assert isinstance(
-            seq, str
-        ), "Wrong sequence type, expected %s but got %s" % (
-=======
         assert seq is not None, "Failed to find sequence for ENSMUST00000138942"
         assert isinstance(seq, str), "Wrong sequence type, expected %s but got %s" % (
->>>>>>> upstream/master
             str,
             type(seq),
         )
@@ -44,21 +35,10 @@ def test_missing_sequence():
 def test_clear_cache():
     with TemporaryDirectory() as tmpdir:
         seqs = SequenceData([FASTA_PATH], cache_directory_path=tmpdir)
-<<<<<<< HEAD
-        assert (
-            not seqs._fasta_dictionary
-        ), "Expected _fasta_dictionary to load lazily"
-
-        seqs._load_or_create_fasta_dictionary_pickle()
-        assert (
-            len(seqs._fasta_dictionary) > 0
-        ), "FASTA dictionary didn't get created"
-=======
         assert not seqs._fasta_dictionary, "Expected _fasta_dictionary to load lazily"
 
         seqs._load_or_create_fasta_dictionary_pickle()
         assert len(seqs._fasta_dictionary) > 0, "FASTA dictionary didn't get created"
->>>>>>> upstream/master
 
         seqs.clear_cache()
         assert (
@@ -71,10 +51,4 @@ def test_clear_cache():
 
         seqs._load_or_create_fasta_dictionary_pickle()
         for pickle_path in seqs.fasta_dictionary_pickle_paths:
-<<<<<<< HEAD
-            assert exists(
-                pickle_path
-            ), "Cached pickle file should have been created"
-=======
             assert exists(pickle_path), "Cached pickle file should have been created"
->>>>>>> upstream/master
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index c5ab820..738a39a 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -105,14 +105,8 @@ def test_custom_genome_to_json():
 
 
 def test_custom_genome_to_dict():
-<<<<<<< HEAD
-    reconstructed = Genome.from_dict(
-        custom_mouse_genome_grcm38_subset.to_dict()
-    )
-=======
     setup_init_custom_mouse_genome()
     reconstructed = Genome.from_dict(custom_mouse_genome_grcm38_subset.to_dict())
->>>>>>> upstream/master
     eq_(custom_mouse_genome_grcm38_subset, reconstructed)
 
 
@@ -128,20 +122,9 @@ def test_species_to_pickle():
     eq_(human, pickle.loads(pickle.dumps(human)))
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-def test_unique_memory_address_of_unpickled_genomes(ensembl_genome):
-    unpickled = pickle.loads(pickle.dumps(ensembl_genome))
-    assert (
-        ensembl_genome is unpickled
-    ), "Expected same object for %s but got two different instances" % (
-        unpickled,
-    )
-=======
 @run_multiple_genomes()
 def test_unique_memory_address_of_unpickled_genomes(genome):
     unpickled = pickle.loads(pickle.dumps(genome))
     assert (
         genome is unpickled
     ), "Expected same object for %s but got two different instances" % (unpickled,)
->>>>>>> upstream/master
diff --git a/tests/test_shell.py b/tests/test_shell.py
index ee445ee..9c707f9 100644
--- a/tests/test_shell.py
+++ b/tests/test_shell.py
@@ -3,9 +3,7 @@
 
 
 def test_genome_selection_grch38():
-    args = parser.parse_args(
-        ["install", "--release", "100", "--species", "human"]
-    )
+    args = parser.parse_args(["install", "--release", "100", "--species", "human"])
     genomes = all_combinations_of_ensembl_genomes(args)
     assert len(genomes) == 1
     genome = genomes[0]
diff --git a/tests/test_timings.py b/tests/test_timings.py
index 9505ea5..b0fd8e1 100644
--- a/tests/test_timings.py
+++ b/tests/test_timings.py
@@ -14,13 +14,7 @@ def make_repeat_lookup_fn(lookup_fn, n_positions):
 
     def repeat_lookup_fn():
         for contig in contigs:
-<<<<<<< HEAD
-            for position in [
-                10**6 + i * 10**6 for i in range(n_positions)
-            ]:
-=======
             for position in [10**6 + i * 10**6 for i in range(n_positions)]:
->>>>>>> upstream/master
                 lookup_fn(contig, position)
 
     return repeat_lookup_fn
@@ -34,19 +28,9 @@ def run_benchmark(lookup_fn, n_positions_per_contig=20, time_limit=60.0):
     repeat_lookup_fn = make_repeat_lookup_fn(lookup_fn, n_positions_per_contig)
     n_loci = n_positions_per_contig * len(contigs)
     name = lookup_fn.__name__
-<<<<<<< HEAD
-    average_time = benchmark(
-        repeat_lookup_fn, name="%s for %d loci" % (name, n_loci)
-    )
-    print("-- %s : %0.4fs" % (name, average_time))
-    assert (
-        average_time < time_limit
-    ), "%s took too long for %s loci: %0.4fs" % (
-=======
     average_time = benchmark(repeat_lookup_fn, name="%s for %d loci" % (name, n_loci))
     print("-- %s : %0.4fs" % (name, average_time))
     assert average_time < time_limit, "%s took too long for %s loci: %0.4fs" % (
->>>>>>> upstream/master
         name,
         n_loci,
         average_time,
diff --git a/tests/test_transcript_ids.py b/tests/test_transcript_ids.py
index 29291c0..b806608 100644
--- a/tests/test_transcript_ids.py
+++ b/tests/test_transcript_ids.py
@@ -32,12 +32,7 @@ def test_transcript_ids_ensembl_grch38_hla_a():
     transcript_ids = grch38.transcript_ids_at_locus(6, 29941260, 29945884)
     for transcript_id in HLA_A_TRANSCRIPT_IDS:
         assert transcript_id in transcript_ids, (
-<<<<<<< HEAD
-            "Transcript %s of HLA-A not found overlapping locus"
-            % transcript_id
-=======
             "Transcript %s of HLA-A not found overlapping locus" % transcript_id
->>>>>>> upstream/master
         )
 
 
@@ -54,17 +49,9 @@ def test_transcript_ids_ensembl_grch38_hla_a():
 def test_all_transcript_ids(genome):
     transcript_ids = set(genome.transcript_ids())
     for transcript_id in KNOWN_TRANSCRIPT_IDS:
-<<<<<<< HEAD
-        assert (
-            transcript_id in transcript_ids
-        ), "Missing transcript ID %s from %s" % (
-            transcript_id,
-            ensembl,
-=======
         assert transcript_id in transcript_ids, "Missing transcript ID %s from %s" % (
             transcript_id,
             genome,
->>>>>>> upstream/master
         )
 
 
diff --git a/tests/test_transcript_objects.py b/tests/test_transcript_objects.py
index 2a38aaf..a243e13 100644
--- a/tests/test_transcript_objects.py
+++ b/tests/test_transcript_objects.py
@@ -22,13 +22,7 @@ def test_transcript_start_codon():
     test_transcript_start_codon : Check that fields Transcript
     (for transcript named CTNNBIP1-004) matches known values.
     """
-<<<<<<< HEAD
-    CTNNBIP1_004_transcript = ensembl77.transcript_by_id(
-        CTNNBIP1_004_transcript_id
-    )
-=======
     CTNNBIP1_004_transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
->>>>>>> upstream/master
 
     assert Locus.__eq__(
         CTNNBIP1_004_transcript, CTNNBIP1_004_locus
@@ -66,13 +60,7 @@ def test_transcript_exons():
     """
     transcript = ensembl77.transcript_by_id(CTNNBIP1_004_transcript_id)
     exons = transcript.exons
-<<<<<<< HEAD
-    assert isinstance(
-        exons, list
-    ), "Expected list of Exon objects, got %s : %s" % (
-=======
     assert isinstance(exons, list), "Expected list of Exon objects, got %s : %s" % (
->>>>>>> upstream/master
         exons,
         type(exons),
     )
@@ -80,14 +68,7 @@ def test_transcript_exons():
     # CTTNBIP1-004 has 5 exons
     assert len(exons) == len(
         CTTNNIP1_004_exon_lengths
-<<<<<<< HEAD
-    ), "Expected %d exons but got %d" % (
-        len(CTTNNIP1_004_exon_lengths),
-        len(exons),
-    )
-=======
     ), "Expected %d exons but got %d" % (len(CTTNNIP1_004_exon_lengths), len(exons))
->>>>>>> upstream/master
 
     for i, exon in enumerate(exons):
         expected_id = CTTNNIP1_004_exon_ids[i]
@@ -146,17 +127,7 @@ def test_sequence_parts(genome):
         combined_sequence_length,
         len(transcript),
         "Length 5' UTR(%dnt) + CDS(%dnt) + 3' UTR(%d) = %d, expected %d"
-<<<<<<< HEAD
-        % (
-            len(utr5),
-            len(cds),
-            len(utr3),
-            combined_sequence_length,
-            len(transcript),
-        ),
-=======
         % (len(utr5), len(cds), len(utr3), combined_sequence_length, len(transcript)),
->>>>>>> upstream/master
     )
     eq_(
         combined_string,
@@ -173,12 +144,7 @@ def test_transcript_utr5_sequence_CTNNIP1_004():
     eq_(
         len(utr5),
         expected_utr5_length,
-<<<<<<< HEAD
-        "Expected 5' UTR length %d, got %d"
-        % (expected_utr5_length, len(utr5)),
-=======
         "Expected 5' UTR length %d, got %d" % (expected_utr5_length, len(utr5)),
->>>>>>> upstream/master
     )
     eq_(utr5, CTNNBIP1_004_UTR5)
 
@@ -190,12 +156,7 @@ def test_transcript_utr3_sequence_CTNNIP1_004():
     eq_(
         len(utr3),
         expected_utr3_length,
-<<<<<<< HEAD
-        "Expected 3' UTR length %d, got %d"
-        % (expected_utr3_length, len(utr3)),
-=======
         "Expected 3' UTR length %d, got %d" % (expected_utr3_length, len(utr3)),
->>>>>>> upstream/master
     )
     eq_(utr3, CTNNBIP1_004_UTR3)
 
@@ -212,11 +173,7 @@ def test_transcript_cds_CTNNIP1_004():
     eq_(cds, CTNNBIP1_004_CDS)
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def test_equal_transcripts(genome):
     t1 = genome.genes_by_name("TP53")[0].transcripts[0]
     # get an identical gene
@@ -225,18 +182,13 @@ def test_equal_transcripts(genome):
     eq_(hash(t1), hash(t2))
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-=======
 @run_multiple_genomes()
->>>>>>> upstream/master
 def test_not_equal_transcripts(genome):
     t1 = genome.genes_by_name("MUC1")[0].transcripts[0]
     t2 = genome.genes_by_name("BRCA1")[0].transcripts[0]
     neq_(t1, t2)
 
 
-
 def test_protein_id():
     transcript = ensembl77.transcripts_by_name("EGFR-001")[0]
     eq_(transcript.protein_id, "ENSP00000275493")
@@ -253,17 +205,6 @@ def test_transcript_gene_should_match_parent_gene():
         eq_(transcript.gene, gene)
 
 
-<<<<<<< HEAD
-@test_ensembl_releases()
-def test_BRCA1_201_has_protein_coding_biotype(genome):
-    transcript = genome.transcripts_by_name("BRCA1-201")[0]
-    assert transcript.is_protein_coding, (
-        "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s"
-        % (
-            transcript,
-            genome,
-        )
-=======
 @run_multiple_genomes()
 def test_BRCA1_201_has_protein_coding_biotype(genome):
     transcript = genome.transcripts_by_name("BRCA1-201")[0]
@@ -272,6 +213,5 @@ def test_BRCA1_201_has_protein_coding_biotype(genome):
     ), "Expected BRCA1-201 transcript %s to have a protein coding biotype in %s" % (
         transcript,
         genome,
->>>>>>> upstream/master
     )
     eq_(transcript.biotype, "protein_coding")
diff --git a/tests/test_ucsc_gtf.py b/tests/test_ucsc_gtf.py
index 57015df..b40c3ff 100644
--- a/tests/test_ucsc_gtf.py
+++ b/tests/test_ucsc_gtf.py
@@ -13,14 +13,7 @@ def test_ucsc_gencode_gtf():
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 12 exons from the dataframe
-<<<<<<< HEAD
-        assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (
-            len(exons),
-            exons,
-        )
-=======
         assert len(exons) == 12, "Expected 12 exons, got %d: %s" % (len(exons), exons)
->>>>>>> upstream/master
 
 
 def test_ucsc_gencode_genome():
@@ -38,23 +31,11 @@ def test_ucsc_gencode_genome():
         genome.index()
         genes = genome.genes()
         for gene in genes:
-<<<<<<< HEAD
-            assert gene.id, "Gene with missing ID in %s" % (
-                genome.gtf.dataframe(),
-            )
-        assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (
-            len(genes),
-            genes,
-        )
-=======
-            assert gene.id, "Gene with missing ID in %s" % (genome.gtf.dataframe(),)
+            assert gene.id, "Gene with missing ID in %s" % (genome,)
         assert len(genes) == 7, "Expected 7 genes, got %d: %s" % (len(genes), genes)
->>>>>>> upstream/master
         transcripts = genome.transcripts()
         for transcript in transcripts:
-            assert transcript.id, "Transcript with missing ID in %s" % (
-                genome.gtf.dataframe(),
-            )
+            assert transcript.id, "Transcript with missing ID in %s" % (genome,)
         assert len(transcripts) == 7, "Expected 7 transcripts, got %d: %s" % (
             len(transcripts),
             transcripts,
@@ -82,14 +63,7 @@ def test_ucsc_refseq_gtf():
         df = db._load_gtf_as_dataframe()
         exons = df[df["feature"] == "exon"]
         # expect 16 exons from the GTF
-<<<<<<< HEAD
-        assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (
-            len(exons),
-            exons,
-        )
-=======
         assert len(exons) == 16, "Expected 16 exons, got %d: %s" % (len(exons), exons)
->>>>>>> upstream/master
 
 
 def test_ucsc_refseq_genome():
@@ -110,14 +84,7 @@ def test_ucsc_refseq_genome():
             assert gene.id, "Gene with missing ID in %s" % (
                 genome.db._load_gtf_as_dataframe(),
             )
-<<<<<<< HEAD
-        assert len(genes) == 2, "Expected 2 genes, got %d: %s" % (
-            len(genes),
-            genes,
-        )
-=======
         assert len(genes) == 2, "Expected 2 genes, got %d: %s" % (len(genes), genes)
->>>>>>> upstream/master
         transcripts = genome.transcripts()
         for transcript in transcripts:
             assert transcript.id, "Transcript with missing ID in %s" % (