Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V0.13rc #231

Merged
merged 12 commits into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ jobs:
build-and-test:
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -30,7 +30,7 @@ jobs:
conda config --system --set channel_priority strict
mamba create -y -n gffutils-env \
python=${{ matrix.python-version }} \
--file requirements.txt
bedtools

conda activate gffutils-env
python setup.py clean sdist
Expand All @@ -43,9 +43,9 @@ jobs:
run: |
source "${HOME}/conda/etc/profile.d/conda.sh"
source "${HOME}/conda/etc/profile.d/mamba.sh"
mamba install -y -n gffutils-env --file optional-requirements.txt pytest hypothesis

conda activate gffutils-env
pip install pytest hypothesis biopython pybedtools
pytest -v --doctest-modules gffutils
conda deactivate

Expand Down
11 changes: 11 additions & 0 deletions doc/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
Change log
==========

v0.13
-----

- Document options for avoiding deadlocks when simultaneously reading/writing
to a db on disk (fixes `#227
<https://github.com/daler/gffutils/issues/227>`__).
- Support later versions of BioPython (fixes `#228
<https://github.com/daler/gffutils/issues/228>`__).
- Drop support for Python 3.7 and unused ``six`` dependency; support Python
3.11 and 3.12 (fixes `#223 <https://github.com/daler/gffutils/issues/223>`__)

v0.12
-----

Expand Down
3 changes: 1 addition & 2 deletions gffutils/attributes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import six
import collections

try:
Expand Down Expand Up @@ -95,7 +94,7 @@ def __str__(self):
return "\n".join(s)

def update(self, *args, **kwargs):
for k, v in six.iteritems(dict(*args, **kwargs)):
for k, v in dict(*args, **kwargs).items():
self[k] = v


Expand Down
17 changes: 9 additions & 8 deletions gffutils/biopython_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Module for integration with BioPython, specifically SeqRecords and SeqFeature
objects.
"""
import six

try:
from Bio.SeqFeature import SeqFeature, FeatureLocation
Expand All @@ -15,7 +14,8 @@
_biopython_strand = {
"+": 1,
"-": -1,
".": 0,
".": None,
"?": 0,
}
_feature_strand = dict((v, k) for k, v in _biopython_strand.items())

Expand All @@ -33,7 +33,7 @@ def to_seqfeature(feature):
If string, assume it is a GFF or GTF-format line; otherwise just use
the provided feature directly.
"""
if isinstance(feature, six.string_types):
if isinstance(feature, str):
feature = feature_from_line(feature)

qualifiers = {
Expand All @@ -46,10 +46,11 @@ def to_seqfeature(feature):
return SeqFeature(
# Convert from GFF 1-based to standard Python 0-based indexing used by
# BioPython
FeatureLocation(feature.start - 1, feature.stop),
FeatureLocation(
feature.start - 1, feature.stop, strand=_biopython_strand[feature.strand]
),
id=feature.id,
type=feature.featuretype,
strand=_biopython_strand[feature.strand],
qualifiers=qualifiers,
)

Expand All @@ -66,12 +67,12 @@ def from_seqfeature(s, **kwargs):
score = s.qualifiers.get("score", ".")[0]
seqid = s.qualifiers.get("seqid", ".")[0]
frame = s.qualifiers.get("frame", ".")[0]
strand = _feature_strand[s.strand]
strand = _feature_strand[s.location.strand]

# BioPython parses 1-based GenBank positions into 0-based for use within
# Python. We need to convert back to 1-based GFF format here.
start = s.location.start.position + 1
stop = s.location.end.position
start = s.location.start + 1
stop = s.location.end
featuretype = s.type
id = s.id
attributes = dict(s.qualifiers)
Expand Down
4 changes: 1 addition & 3 deletions gffutils/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Conversion functions that operate on :class:`FeatureDB` classes.
"""

import six


def to_bed12(f, db, child_type="exon", name_field="ID"):
"""
Expand All @@ -22,7 +20,7 @@ def to_bed12(f, db, child_type="exon", name_field="ID"):
Attribute to be used in the "name" field of the BED12 entry. Usually
"ID" for GFF; "transcript_id" for GTF.
"""
if isinstance(f, six.string_types):
if isinstance(f, str):
f = db[f]
children = list(db.children(f, featuretype=child_type, order_by="start"))
sizes = [len(i) for i in children]
Expand Down
14 changes: 7 additions & 7 deletions gffutils/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import sys
import os
import sqlite3
import six
from textwrap import dedent
from gffutils import constants
from gffutils import version
Expand Down Expand Up @@ -119,7 +118,7 @@ def __init__(
os.unlink(dbfn)
self.dbfn = dbfn
self.id_spec = id_spec
if isinstance(dbfn, six.string_types):
if isinstance(dbfn, str):
conn = sqlite3.connect(dbfn)
else:
conn = dbfn
Expand Down Expand Up @@ -171,7 +170,7 @@ def _id_handler(self, f):
"""

# If id_spec is a string or callable, convert to iterable for later
if isinstance(self.id_spec, six.string_types):
if isinstance(self.id_spec, str):
id_key = [self.id_spec]
elif hasattr(self.id_spec, "__call__"):
id_key = [self.id_spec]
Expand All @@ -181,7 +180,7 @@ def _id_handler(self, f):
elif isinstance(self.id_spec, dict):
try:
id_key = self.id_spec[f.featuretype]
if isinstance(id_key, six.string_types):
if isinstance(id_key, str):
id_key = [id_key]

# Otherwise, use default auto-increment.
Expand Down Expand Up @@ -217,7 +216,8 @@ def _id_handler(self, f):
"a single value is required for a primary key in the "
"database. Consider using a custom id_spec to "
"convert these multiple values into a single "
"value".format(k))
"value".format(k)
)
except KeyError:
pass
try:
Expand Down Expand Up @@ -684,7 +684,7 @@ def _update_relations(self):
# c.execute('CREATE INDEX childindex ON relations (child)')
# self.conn.commit()

if isinstance(self._keep_tempfiles, six.string_types):
if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
Expand Down Expand Up @@ -883,7 +883,7 @@ def _update_relations(self):
msg = "transcript"
logger.info("Inferring %s extents " "and writing to tempfile" % msg)

if isinstance(self._keep_tempfiles, six.string_types):
if isinstance(self._keep_tempfiles, str):
suffix = self._keep_tempfiles
else:
suffix = ".gffutils"
Expand Down
12 changes: 4 additions & 8 deletions gffutils/feature.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from pyfaidx import Fasta
import six
import simplejson as json
from gffutils import constants
from gffutils import helpers
Expand Down Expand Up @@ -166,7 +165,7 @@ def __init__(
# for testing.
attributes = attributes or dict_class()

if isinstance(attributes, six.string_types):
if isinstance(attributes, str):
try:
attributes = helpers._unjsonify(attributes, isattributes=True)

Expand All @@ -182,7 +181,7 @@ def __init__(
# If string, then try un-JSONifying it into a list; if that doesn't
# work then assume it's tab-delimited and convert to a list.
extra = extra or []
if isinstance(extra, six.string_types):
if isinstance(extra, str):
try:
extra = helpers._unjsonify(extra)
except json.JSONDecodeError:
Expand Down Expand Up @@ -254,10 +253,7 @@ def __setitem__(self, key, value):
self.attributes[key] = value

def __str__(self):
if six.PY3:
return self.__unicode__()
else:
return unicode(self).encode("utf-8")
return self.__unicode__()

def __unicode__(self):

Expand Down Expand Up @@ -387,7 +383,7 @@ def sequence(self, fasta, use_strand=True):
-------
string
"""
if isinstance(fasta, six.string_types):
if isinstance(fasta, str):
fasta = Fasta(fasta, as_raw=False)

# recall GTF/GFF is 1-based closed; pyfaidx uses Python slice notation
Expand Down
3 changes: 1 addition & 2 deletions gffutils/gffwriter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
##
## GFF Writer (writer): serializing gffutils records as GFF text files.
##
import six
import tempfile
import shutil
from time import strftime, localtime
Expand Down Expand Up @@ -41,7 +40,7 @@ def __init__(self, out, with_header=True, in_place=False):
self.temp_file = None
# Output stream to write to
self.out_stream = None
if isinstance(out, six.string_types):
if isinstance(out, str):
if self.in_place:
# Use temporary file
self.temp_file = tempfile.NamedTemporaryFile(delete=False)
Expand Down
38 changes: 27 additions & 11 deletions gffutils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import simplejson as json
import time
import tempfile
import six
from gffutils import constants
from gffutils import bins
import gffutils
Expand Down Expand Up @@ -202,7 +201,7 @@ def make_query(
# e.g., "featuretype = 'exon'"
#
# or, "featuretype IN ('exon', 'CDS')"
if isinstance(featuretype, six.string_types):
if isinstance(featuretype, str):
d["FEATURETYPE"] = "features.featuretype = ?"
args.append(featuretype)
else:
Expand All @@ -218,7 +217,7 @@ def make_query(
# `limit` is a string or a tuple of (chrom, start, stop)
#
# e.g., "seqid = 'chr2L' AND start > 1000 AND end < 5000"
if isinstance(limit, six.string_types):
if isinstance(limit, str):
seqid, startstop = limit.split(":")
start, end = startstop.split("-")
else:
Expand Down Expand Up @@ -257,7 +256,7 @@ def make_query(
# Default is essentially random order.
#
# e.g. "ORDER BY seqid, start DESC"
if isinstance(order_by, six.string_types):
if isinstance(order_by, str):
_order_by.append(order_by)

else:
Expand Down Expand Up @@ -387,7 +386,7 @@ def merge_attributes(attr1, attr2, numeric_sort=False):
if not isinstance(v, list):
new_d[k] = [v]

for k, v in six.iteritems(attr1):
for k, v in attr1.items():
if k in attr2:
if not isinstance(v, list):
v = [v]
Expand Down Expand Up @@ -507,9 +506,9 @@ def is_gff_db(db_fname):


def to_unicode(obj, encoding="utf-8"):
if isinstance(obj, six.string_types):
if not isinstance(obj, six.text_type):
obj = six.text_type(obj, encoding)
if isinstance(obj, str):
if not isinstance(obj, str):
obj = str(obj, encoding)
return obj


Expand All @@ -520,7 +519,6 @@ def canonical_transcripts(db, fasta_filename):
"""
import pyfaidx


fasta = pyfaidx.Fasta(fasta_filename, as_raw=False)
for gene in db.features_of_type("gene"):

Expand All @@ -536,7 +534,20 @@ def canonical_transcripts(db, fasta_filename):
cds_len += exon_length
total_len += exon_length

exon_list.append((cds_len, total_len, transcript, exons if cds_len == 0 else [e for e in exons if e.featuretype in ['CDS', 'five_prime_UTR', 'three_prime_UTR']]))
exon_list.append(
(
cds_len,
total_len,
transcript,
exons
if cds_len == 0
else [
e
for e in exons
if e.featuretype in ["CDS", "five_prime_UTR", "three_prime_UTR"]
],
)
)

# If we have CDS, then use the longest coding transcript
if max(i[0] for i in exon_list) > 0:
Expand All @@ -549,7 +560,12 @@ def canonical_transcripts(db, fasta_filename):

canonical_exons = best[-1]
transcript = best[-2]
seqs = [i.sequence(fasta) for i in sorted(canonical_exons, key=lambda x: x.start, reverse=transcript.strand != '+')]
seqs = [
i.sequence(fasta)
for i in sorted(
canonical_exons, key=lambda x: x.start, reverse=transcript.strand != "+"
)
]
yield transcript, "".join(seqs)


Expand Down
Loading
Loading