Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Python 2 compatibility shims #979

Merged
merged 32 commits into from
Feb 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ff57c04
Remove str import from builtins
Harmon758 Feb 7, 2020
e0bf255
Remove unnecessary check for sys.getfilesystemencoding
Harmon758 Feb 7, 2020
142c779
Remove and replace compat.FileType
Harmon758 Feb 7, 2020
584ab08
Remove compat.byte_ord
Harmon758 Feb 7, 2020
e564c2f
Remove and replace compat.bchr
Harmon758 Feb 7, 2020
5444787
Remove and replace compat.mviter
Harmon758 Feb 7, 2020
3f21cb1
Remove compat.range
Harmon758 Feb 7, 2020
d0d2a86
Remove and replace compat.xrange
Harmon758 Feb 7, 2020
91e91b2
Remove and replace compat.unicode
Harmon758 Feb 7, 2020
c30880d
Remove Python 2 check for compat.defenc
Harmon758 Feb 7, 2020
2c4d556
Remove and replace compat.binary_type
Harmon758 Feb 7, 2020
8e55323
Remove and replace compat._unichr
Harmon758 Feb 7, 2020
18fc6b2
Remove and replace compat.bytes_chr
Harmon758 Feb 7, 2020
9615ada
Remove surrogateescape error handler for Python 2
Harmon758 Feb 7, 2020
5549ffe
Remove and replace compat.UnicodeMixin
Harmon758 Feb 7, 2020
60c8dc2
Remove checks for Python 2 and/or 3
Harmon758 Feb 7, 2020
6005b89
Remove Python 2 test
Harmon758 Feb 7, 2020
952eaad
Remove compat.PY3
Harmon758 Feb 7, 2020
266187b
Remove and replace compat.MAXSIZE
Harmon758 Feb 7, 2020
8a8b24e
Remove and replace compat.izip
Harmon758 Feb 7, 2020
07df7c9
Remove and replace compat.string_types
Harmon758 Feb 7, 2020
2f31261
Remove and replace compat.text_type
Harmon758 Feb 7, 2020
369de3d
Remove no longer used compat imports
Harmon758 Feb 7, 2020
92348df
Remove no longer used imports in tests
Harmon758 Feb 7, 2020
ebcdb8b
Remove attempt to import ConfigParser for Python 2
Harmon758 Feb 7, 2020
7f250ca
Remove check for Python 2.7
Harmon758 Feb 7, 2020
21d56e2
Remove unnecessary check for logging.NullHandler for Python 2.6
Harmon758 Feb 7, 2020
d96688f
Improve setup.py python_requires
Harmon758 Feb 7, 2020
d0cd5bf
Remove unnecessary check for PermissionError for Python < 3.3
Harmon758 Feb 7, 2020
a611adc
Add to AUTHORS
Harmon758 Feb 7, 2020
d0899a0
Fix requirements.txt formatting
Harmon758 Feb 7, 2020
c5f5911
Remove now unused is_invoking_git variable in test
Harmon758 Feb 7, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ Contributors are:
-Ben Thayer <ben _at_ benthayer.com>
-Dries Kennes <admin _at_ dries007.net>
-Pratik Anurag <panurag247365 _at_ gmail.com>
-Harmon <harmon.public _at_ gmail.com>
Portions derived from other open source works and are clearly marked.
17 changes: 1 addition & 16 deletions git/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,8 @@
from textwrap import dedent

from git.compat import (
string_types,
defenc,
force_bytes,
PY3,
# just to satisfy flake8 on py3
unicode,
safe_decode,
is_posix,
is_win,
Expand All @@ -43,11 +39,6 @@
stream_copy,
)

try:
PermissionError
except NameError: # Python < 3.3
PermissionError = OSError

execute_kwargs = {'istream', 'with_extended_output',
'with_exceptions', 'as_process', 'stdout_as_string',
'output_stream', 'with_stdout', 'kill_after_timeout',
Expand Down Expand Up @@ -918,18 +909,12 @@ def transform_kwargs(self, split_single_char_options=True, **kwargs):
@classmethod
def __unpack_args(cls, arg_list):
if not isinstance(arg_list, (list, tuple)):
# This is just required for unicode conversion, as subprocess can't handle it
# However, in any other case, passing strings (usually utf-8 encoded) is totally fine
if not PY3 and isinstance(arg_list, unicode):
return [arg_list.encode(defenc)]
return [str(arg_list)]

outlist = []
for arg in arg_list:
if isinstance(arg_list, (list, tuple)):
outlist.extend(cls.__unpack_args(arg))
elif not PY3 and isinstance(arg_list, unicode):
outlist.append(arg_list.encode(defenc))
# END recursion
else:
outlist.append(str(arg))
Expand Down Expand Up @@ -1047,7 +1032,7 @@ def _prepare_ref(self, ref):
if isinstance(ref, bytes):
# Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text
refstr = ref.decode('ascii')
elif not isinstance(ref, string_types):
elif not isinstance(ref, str):
refstr = str(ref) # could be ref-object

if not refstr.endswith("\n"):
Expand Down
254 changes: 4 additions & 250 deletions git/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,66 +10,23 @@
import locale
import os
import sys
import codecs


from gitdb.utils.compat import (
xrange,
MAXSIZE, # @UnusedImport
izip, # @UnusedImport
)
from gitdb.utils.encoding import (
string_types, # @UnusedImport
text_type, # @UnusedImport
force_bytes, # @UnusedImport
force_text # @UnusedImport
)


PY3 = sys.version_info[0] >= 3
is_win = (os.name == 'nt')
is_posix = (os.name == 'posix')
is_darwin = (os.name == 'darwin')
if hasattr(sys, 'getfilesystemencoding'):
defenc = sys.getfilesystemencoding()
if defenc is None:
defenc = sys.getdefaultencoding()

if PY3:
import io
FileType = io.IOBase

def byte_ord(b):
return b

def bchr(n):
return bytes([n])

def mviter(d):
return d.values()

range = xrange # @ReservedAssignment
unicode = str
binary_type = bytes
else:
FileType = file # @UndefinedVariable on PY3
# usually, this is just ascii, which might not enough for our encoding needs
# Unless it's set specifically, we override it to be utf-8
if defenc == 'ascii':
defenc = 'utf-8'
byte_ord = ord
bchr = chr
unicode = unicode
binary_type = str
range = xrange # @ReservedAssignment

def mviter(d):
return d.itervalues()
defenc = sys.getfilesystemencoding()


def safe_decode(s):
"""Safely decodes a binary string to unicode"""
if isinstance(s, unicode):
if isinstance(s, str):
return s
elif isinstance(s, bytes):
return s.decode(defenc, 'surrogateescape')
Expand All @@ -79,7 +36,7 @@ def safe_decode(s):

def safe_encode(s):
"""Safely decodes a binary string to unicode"""
if isinstance(s, unicode):
if isinstance(s, str):
return s.encode(defenc)
elif isinstance(s, bytes):
return s
Expand All @@ -89,7 +46,7 @@ def safe_encode(s):

def win_encode(s):
"""Encode unicodes for process arguments on Windows."""
if isinstance(s, unicode):
if isinstance(s, str):
return s.encode(locale.getpreferredencoding(False))
elif isinstance(s, bytes):
return s
Expand All @@ -106,208 +63,5 @@ class metaclass(meta):
def __new__(cls, name, nbases, d):
if nbases is None:
return type.__new__(cls, name, (), d)
# There may be clients who rely on this attribute to be set to a reasonable value, which is why
# we set the __metaclass__ attribute explicitly
if not PY3 and '___metaclass__' not in d:
d['__metaclass__'] = meta
return meta(name, bases, d)
return metaclass(meta.__name__ + 'Helper', None, {})


## From https://docs.python.org/3.3/howto/pyporting.html
class UnicodeMixin(object):

"""Mixin class to handle defining the proper __str__/__unicode__
methods in Python 2 or 3."""

if PY3:
def __str__(self):
return self.__unicode__()
else: # Python 2
def __str__(self):
return self.__unicode__().encode(defenc)


"""
This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error
handler of Python 3.
Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc
"""

# This code is released under the Python license and the BSD 2-clause license


FS_ERRORS = 'surrogateescape'

# # -- Python 2/3 compatibility -------------------------------------
# FS_ERRORS = 'my_surrogateescape'

def u(text):
if PY3:
return text
return text.decode('unicode_escape')

def b(data):
if PY3:
return data.encode('latin1')
return data

if PY3:
_unichr = chr
bytes_chr = lambda code: bytes((code,))
else:
_unichr = unichr
bytes_chr = chr

def surrogateescape_handler(exc):
"""
Pure Python implementation of the PEP 383: the "surrogateescape" error
handler of Python 3. Undecodable bytes will be replaced by a Unicode
character U+DCxx on decoding, and these are translated into the
original bytes on encoding.
"""
mystring = exc.object[exc.start:exc.end]

try:
if isinstance(exc, UnicodeDecodeError):
# mystring is a byte-string in this case
decoded = replace_surrogate_decode(mystring)
elif isinstance(exc, UnicodeEncodeError):
# In the case of u'\udcc3'.encode('ascii',
# 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an
# exception anyway after this function is called, even though I think
# it's doing what it should. It seems that the strict encoder is called
# to encode the unicode string that this function returns ...
decoded = replace_surrogate_encode(mystring, exc)
else:
raise exc
except NotASurrogateError:
raise exc
return (decoded, exc.end)


class NotASurrogateError(Exception):
pass


def replace_surrogate_encode(mystring, exc):
"""
Returns a (unicode) string, not the more logical bytes, because the codecs
register_error functionality expects this.
"""
decoded = []
for ch in mystring:
# if PY3:
# code = ch
# else:
code = ord(ch)

# The following magic comes from Py3.3's Python/codecs.c file:
if not 0xD800 <= code <= 0xDCFF:
# Not a surrogate. Fail with the original exception.
raise exc
# mybytes = [0xe0 | (code >> 12),
# 0x80 | ((code >> 6) & 0x3f),
# 0x80 | (code & 0x3f)]
# Is this a good idea?
if 0xDC00 <= code <= 0xDC7F:
decoded.append(_unichr(code - 0xDC00))
elif code <= 0xDCFF:
decoded.append(_unichr(code - 0xDC00))
else:
raise NotASurrogateError
return str().join(decoded)


def replace_surrogate_decode(mybytes):
"""
Returns a (unicode) string
"""
decoded = []
for ch in mybytes:
# We may be parsing newbytes (in which case ch is an int) or a native
# str on Py2
if isinstance(ch, int):
code = ch
else:
code = ord(ch)
if 0x80 <= code <= 0xFF:
decoded.append(_unichr(0xDC00 + code))
elif code <= 0x7F:
decoded.append(_unichr(code))
else:
# # It may be a bad byte
# # Try swallowing it.
# continue
# print("RAISE!")
raise NotASurrogateError
return str().join(decoded)


def encodefilename(fn):
if FS_ENCODING == 'ascii':
# ASCII encoder of Python 2 expects that the error handler returns a
# Unicode string encodable to ASCII, whereas our surrogateescape error
# handler has to return bytes in 0x80-0xFF range.
encoded = []
for index, ch in enumerate(fn):
code = ord(ch)
if code < 128:
ch = bytes_chr(code)
elif 0xDC80 <= code <= 0xDCFF:
ch = bytes_chr(code - 0xDC00)
else:
raise UnicodeEncodeError(FS_ENCODING,
fn, index, index+1,
'ordinal not in range(128)')
encoded.append(ch)
return bytes().join(encoded)
elif FS_ENCODING == 'utf-8':
# UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF
# doesn't go through our error handler
encoded = []
for index, ch in enumerate(fn):
code = ord(ch)
if 0xD800 <= code <= 0xDFFF:
if 0xDC80 <= code <= 0xDCFF:
ch = bytes_chr(code - 0xDC00)
encoded.append(ch)
else:
raise UnicodeEncodeError(
FS_ENCODING,
fn, index, index+1, 'surrogates not allowed')
else:
ch_utf8 = ch.encode('utf-8')
encoded.append(ch_utf8)
return bytes().join(encoded)
return fn.encode(FS_ENCODING, FS_ERRORS)

def decodefilename(fn):
return fn.decode(FS_ENCODING, FS_ERRORS)

FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')
# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]')
# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]')


# normalize the filesystem encoding name.
# For example, we expect "utf-8", not "UTF8".
FS_ENCODING = codecs.lookup(FS_ENCODING).name


def register_surrogateescape():
"""
Registers the surrogateescape error handler on Python 2 (only)
"""
if PY3:
return
try:
codecs.lookup_error(FS_ERRORS)
except LookupError:
codecs.register_error(FS_ERRORS, surrogateescape_handler)


try:
b"100644 \x9f\0aaa".decode(defenc, "surrogateescape")
except Exception:
register_surrogateescape()
Loading