diff --git a/gitdb/db/loose.py b/gitdb/db/loose.py index 53ade9f..7bf92da 100644 --- a/gitdb/db/loose.py +++ b/gitdb/db/loose.py @@ -50,11 +50,11 @@ stream_copy ) -from gitdb.utils.compat import MAXSIZE from gitdb.utils.encoding import force_bytes import tempfile import os +import sys __all__ = ('LooseObjectDB', ) @@ -196,7 +196,7 @@ def store(self, istream): if istream.binsha is not None: # copy as much as possible, the actual uncompressed item size might # be smaller than the compressed version - stream_copy(istream.read, writer.write, MAXSIZE, self.stream_chunk_size) + stream_copy(istream.read, writer.write, sys.maxsize, self.stream_chunk_size) else: # write object with header, we have to make a new one write_object(istream.type, istream.size, istream.read, writer.write, diff --git a/gitdb/db/pack.py b/gitdb/db/pack.py index 1e37d73..177ed7b 100644 --- a/gitdb/db/pack.py +++ b/gitdb/db/pack.py @@ -18,7 +18,6 @@ ) from gitdb.pack import PackEntity -from gitdb.utils.compat import xrange from functools import reduce @@ -107,7 +106,7 @@ def sha_iter(self): for entity in self.entities(): index = entity.index() sha_by_index = index.sha - for index in xrange(index.size()): + for index in range(index.size()): yield sha_by_index(index) # END for each index # END for each entity diff --git a/gitdb/fun.py b/gitdb/fun.py index 8ca38c8..9846597 100644 --- a/gitdb/fun.py +++ b/gitdb/fun.py @@ -16,7 +16,6 @@ from gitdb.const import NULL_BYTE, BYTE_SPACE from gitdb.utils.encoding import force_text -from gitdb.utils.compat import izip, buffer, xrange, PY3 from gitdb.typ import ( str_blob_type, str_commit_type, @@ -101,7 +100,7 @@ def delta_chunk_apply(dc, bbuf, write): :param write: write method to call with data to write""" if dc.data is None: # COPY DATA FROM SOURCE - write(buffer(bbuf, dc.so, dc.ts)) + write(bbuf[dc.so:dc.so + dc.ts]) else: # APPEND DATA # whats faster: if + 4 function calls or just a write with a slice ? @@ -264,7 +263,7 @@ def compress(self): # if first_data_index is not None: nd = StringIO() # new data so = self[first_data_index].to # start offset in target buffer - for x in xrange(first_data_index, i - 1): + for x in range(first_data_index, i - 1): xdc = self[x] nd.write(xdc.data[:xdc.ts]) # END collect data @@ -314,7 +313,7 @@ def check_integrity(self, target_size=-1): right.next() # this is very pythonic - we might have just use index based access here, # but this could actually be faster - for lft, rgt in izip(left, right): + for lft, rgt in zip(left, right): assert lft.rbound() == rgt.to assert lft.to + lft.ts == rgt.to # END for each pair @@ -424,20 +423,12 @@ def pack_object_header_info(data): type_id = (c >> 4) & 7 # numeric type size = c & 15 # starting size s = 4 # starting bit-shift size - if PY3: - while c & 0x80: - c = byte_ord(data[i]) - i += 1 - size += (c & 0x7f) << s - s += 7 - # END character loop - else: - while c & 0x80: - c = ord(data[i]) - i += 1 - size += (c & 0x7f) << s - s += 7 - # END character loop + while c & 0x80: + c = byte_ord(data[i]) + i += 1 + size += (c & 0x7f) << s + s += 7 + # END character loop # end performance at expense of maintenance ... return (type_id, size, i) @@ -450,28 +441,16 @@ def create_pack_object_header(obj_type, obj_size): :param obj_type: pack type_id of the object :param obj_size: uncompressed size in bytes of the following object stream""" c = 0 # 1 byte - if PY3: - hdr = bytearray() # output string - - c = (obj_type << 4) | (obj_size & 0xf) - obj_size >>= 4 - while obj_size: - hdr.append(c | 0x80) - c = obj_size & 0x7f - obj_size >>= 7 - # END until size is consumed - hdr.append(c) - else: - hdr = bytes() # output string - - c = (obj_type << 4) | (obj_size & 0xf) - obj_size >>= 4 - while obj_size: - hdr += chr(c | 0x80) - c = obj_size & 0x7f - obj_size >>= 7 - # END until size is consumed - hdr += chr(c) + hdr = bytearray() # output string + + c = (obj_type << 4) | (obj_size & 0xf) + obj_size >>= 4 + while obj_size: + hdr.append(c | 0x80) + c = obj_size & 0x7f + obj_size >>= 7 + # END until size is consumed + hdr.append(c) # end handle interpreter return hdr @@ -484,26 +463,15 @@ def msb_size(data, offset=0): i = 0 l = len(data) hit_msb = False - if PY3: - while i < l: - c = data[i + offset] - size |= (c & 0x7f) << i * 7 - i += 1 - if not c & 0x80: - hit_msb = True - break - # END check msb bit - # END while in range - else: - while i < l: - c = ord(data[i + offset]) - size |= (c & 0x7f) << i * 7 - i += 1 - if not c & 0x80: - hit_msb = True - break - # END check msb bit - # END while in range + while i < l: + c = data[i + offset] + size |= (c & 0x7f) << i * 7 + i += 1 + if not c & 0x80: + hit_msb = True + break + # END check msb bit + # END while in range # end performance ... if not hit_msb: raise AssertionError("Could not find terminating MSB byte in data stream") @@ -663,93 +631,48 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write): **Note:** transcribed to python from the similar routine in patch-delta.c""" i = 0 db = delta_buf - if PY3: - while i < delta_buf_size: - c = db[i] - i += 1 - if c & 0x80: - cp_off, cp_size = 0, 0 - if (c & 0x01): - cp_off = db[i] - i += 1 - if (c & 0x02): - cp_off |= (db[i] << 8) - i += 1 - if (c & 0x04): - cp_off |= (db[i] << 16) - i += 1 - if (c & 0x08): - cp_off |= (db[i] << 24) - i += 1 - if (c & 0x10): - cp_size = db[i] - i += 1 - if (c & 0x20): - cp_size |= (db[i] << 8) - i += 1 - if (c & 0x40): - cp_size |= (db[i] << 16) - i += 1 - - if not cp_size: - cp_size = 0x10000 - - rbound = cp_off + cp_size - if (rbound < cp_size or - rbound > src_buf_size): - break - write(buffer(src_buf, cp_off, cp_size)) - elif c: - write(db[i:i + c]) - i += c - else: - raise ValueError("unexpected delta opcode 0") - # END handle command byte - # END while processing delta data - else: - while i < delta_buf_size: - c = ord(db[i]) - i += 1 - if c & 0x80: - cp_off, cp_size = 0, 0 - if (c & 0x01): - cp_off = ord(db[i]) - i += 1 - if (c & 0x02): - cp_off |= (ord(db[i]) << 8) - i += 1 - if (c & 0x04): - cp_off |= (ord(db[i]) << 16) - i += 1 - if (c & 0x08): - cp_off |= (ord(db[i]) << 24) - i += 1 - if (c & 0x10): - cp_size = ord(db[i]) - i += 1 - if (c & 0x20): - cp_size |= (ord(db[i]) << 8) - i += 1 - if (c & 0x40): - cp_size |= (ord(db[i]) << 16) - i += 1 - - if not cp_size: - cp_size = 0x10000 - - rbound = cp_off + cp_size - if (rbound < cp_size or - rbound > src_buf_size): - break - write(buffer(src_buf, cp_off, cp_size)) - elif c: - write(db[i:i + c]) - i += c - else: - raise ValueError("unexpected delta opcode 0") - # END handle command byte - # END while processing delta data - # end save byte_ord call and prevent performance regression in py2 + while i < delta_buf_size: + c = db[i] + i += 1 + if c & 0x80: + cp_off, cp_size = 0, 0 + if (c & 0x01): + cp_off = db[i] + i += 1 + if (c & 0x02): + cp_off |= (db[i] << 8) + i += 1 + if (c & 0x04): + cp_off |= (db[i] << 16) + i += 1 + if (c & 0x08): + cp_off |= (db[i] << 24) + i += 1 + if (c & 0x10): + cp_size = db[i] + i += 1 + if (c & 0x20): + cp_size |= (db[i] << 8) + i += 1 + if (c & 0x40): + cp_size |= (db[i] << 16) + i += 1 + + if not cp_size: + cp_size = 0x10000 + + rbound = cp_off + cp_size + if (rbound < cp_size or + rbound > src_buf_size): + break + write(src_buf[cp_off:cp_off + cp_size]) + elif c: + write(db[i:i + c]) + i += c + else: + raise ValueError("unexpected delta opcode 0") + # END handle command byte + # END while processing delta data # yes, lets use the exact same error message that git uses :) assert i == delta_buf_size, "delta replay has gone wild" diff --git a/gitdb/pack.py b/gitdb/pack.py index 2ad2324..a38468e 100644 --- a/gitdb/pack.py +++ b/gitdb/pack.py @@ -62,12 +62,6 @@ from binascii import crc32 from gitdb.const import NULL_BYTE -from gitdb.utils.compat import ( - izip, - buffer, - xrange, - to_bytes -) import tempfile import array @@ -119,7 +113,7 @@ def pack_object_at(cursor, offset, as_stream): # END handle type id abs_data_offset = offset + total_rela_offset if as_stream: - stream = DecompressMemMapReader(buffer(data, total_rela_offset), False, uncomp_size) + stream = DecompressMemMapReader(data[total_rela_offset:], False, uncomp_size) if delta_info is None: return abs_data_offset, OPackStream(offset, type_id, uncomp_size, stream) else: @@ -207,7 +201,7 @@ def write(self, pack_sha, write): for t in self._objs: tmplist[byte_ord(t[0][0])] += 1 # END prepare fanout - for i in xrange(255): + for i in range(255): v = tmplist[i] sha_write(pack('>L', v)) tmplist[i + 1] += v @@ -376,7 +370,7 @@ def _read_fanout(self, byte_offset): d = self._cursor.map() out = list() append = out.append - for i in xrange(256): + for i in range(256): append(unpack_from('>L', d, byte_offset + i * 4)[0]) # END for each entry return out @@ -410,14 +404,14 @@ def offsets(self): if self._version == 2: # read stream to array, convert to tuple a = array.array('I') # 4 byte unsigned int, long are 8 byte on 64 bit it appears - a.frombytes(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset)) + a.frombytes(self._cursor.map()[self._pack_offset:self._pack_64_offset]) # networkbyteorder to something array likes more if sys.byteorder == 'little': a.byteswap() return a else: - return tuple(self.offset(index) for index in xrange(self.size())) + return tuple(self.offset(index) for index in range(self.size())) # END handle version def sha_to_index(self, sha): @@ -696,7 +690,7 @@ def _set_cache_(self, attr): iter_offsets = iter(offsets_sorted) iter_offsets_plus_one = iter(offsets_sorted) next(iter_offsets_plus_one) - consecutive = izip(iter_offsets, iter_offsets_plus_one) + consecutive = zip(iter_offsets, iter_offsets_plus_one) offset_map = dict(consecutive) @@ -716,7 +710,7 @@ def _iter_objects(self, as_stream): """Iterate over all objects in our index and yield their OInfo or OStream instences""" _sha = self._index.sha _object = self._object - for index in xrange(self._index.size()): + for index in range(self._index.size()): yield _object(_sha(index), as_stream, index) # END for each index @@ -838,7 +832,7 @@ def is_valid_stream(self, sha, use_crc=False): while cur_pos < next_offset: rbound = min(cur_pos + chunk_size, next_offset) size = rbound - cur_pos - this_crc_value = crc_update(buffer(pack_data, cur_pos, size), this_crc_value) + this_crc_value = crc_update(pack_data[cur_pos:cur_pos + size], this_crc_value) cur_pos += size # END window size loop @@ -882,7 +876,11 @@ def collect_streams_at_offset(self, offset): stream = streams[-1] while stream.type_id in delta_types: if stream.type_id == REF_DELTA: - sindex = self._index.sha_to_index(to_bytes(stream.delta_info)) + # smmap can return memory view objects, which can't be compared as buffers/bytes can ... + if isinstance(stream.delta_info, memoryview): + sindex = self._index.sha_to_index(stream.delta_info.tobytes()) + else: + sindex = self._index.sha_to_index(stream.delta_info) if sindex is None: break stream = self._pack.stream(self._index.offset(sindex)) diff --git a/gitdb/stream.py b/gitdb/stream.py index 2f4c12d..b94ef24 100644 --- a/gitdb/stream.py +++ b/gitdb/stream.py @@ -27,7 +27,6 @@ ) from gitdb.const import NULL_BYTE, BYTE_SPACE -from gitdb.utils.compat import buffer from gitdb.utils.encoding import force_bytes has_perf_mod = False @@ -278,7 +277,7 @@ def read(self, size=-1): # END adjust winsize # takes a slice, but doesn't copy the data, it says ... - indata = buffer(self._m, self._cws, self._cwe - self._cws) + indata = self._m[self._cws:self._cwe] # get the actual window end to be sure we don't use it for computations self._cwe = self._cws + len(indata) @@ -414,7 +413,7 @@ def _set_cache_brute_(self, attr): buf = dstream.read(512) # read the header information + X offset, src_size = msb_size(buf) offset, target_size = msb_size(buf, offset) - buffer_info_list.append((buffer(buf, offset), offset, src_size, target_size)) + buffer_info_list.append((buf[offset:], offset, src_size, target_size)) max_target_size = max(max_target_size, target_size) # END for each delta stream diff --git a/gitdb/test/db/lib.py b/gitdb/test/db/lib.py index 528bcc1..c6f4316 100644 --- a/gitdb/test/db/lib.py +++ b/gitdb/test/db/lib.py @@ -23,7 +23,6 @@ from gitdb.exc import BadObject from gitdb.typ import str_blob_type -from gitdb.utils.compat import xrange from io import BytesIO @@ -45,7 +44,7 @@ def _assert_object_writing_simple(self, db): # write a bunch of objects and query their streams and info null_objs = db.size() ni = 250 - for i in xrange(ni): + for i in range(ni): data = pack(">L", i) istream = IStream(str_blob_type, len(data), BytesIO(data)) new_istream = db.store(istream) diff --git a/gitdb/test/lib.py b/gitdb/test/lib.py index 42b9ddc..a04084f 100644 --- a/gitdb/test/lib.py +++ b/gitdb/test/lib.py @@ -4,7 +4,6 @@ # the New BSD License: http://www.opensource.org/licenses/bsd-license.php """Utilities used in ODB testing""" from gitdb import OStream -from gitdb.utils.compat import xrange import sys import random @@ -151,7 +150,7 @@ def make_bytes(size_in_bytes, randomize=False): """:return: string with given size in bytes :param randomize: try to produce a very random stream""" actual_size = size_in_bytes // 4 - producer = xrange(actual_size) + producer = range(actual_size) if randomize: producer = list(producer) random.shuffle(producer) diff --git a/gitdb/test/performance/test_pack.py b/gitdb/test/performance/test_pack.py index fc8d9d5..b59d5a9 100644 --- a/gitdb/test/performance/test_pack.py +++ b/gitdb/test/performance/test_pack.py @@ -17,7 +17,6 @@ from gitdb.typ import str_blob_type from gitdb.exc import UnsupportedOperation from gitdb.db.pack import PackedDB -from gitdb.utils.compat import xrange from gitdb.test.lib import skip_on_travis_ci import sys @@ -118,7 +117,7 @@ def test_correctness(self): for entity in pdb.entities(): pack_verify = entity.is_valid_stream sha_by_index = entity.index().sha - for index in xrange(entity.index().size()): + for index in range(entity.index().size()): try: assert pack_verify(sha_by_index(index), use_crc=crc) count += 1 diff --git a/gitdb/test/test_pack.py b/gitdb/test/test_pack.py index 24e2a31..8bf78f0 100644 --- a/gitdb/test/test_pack.py +++ b/gitdb/test/test_pack.py @@ -25,12 +25,6 @@ from gitdb.fun import delta_types from gitdb.exc import UnsupportedOperation from gitdb.util import to_bin_sha -from gitdb.utils.compat import xrange - -try: - from itertools import izip -except ImportError: - izip = zip from nose import SkipTest @@ -63,7 +57,7 @@ def _assert_index_file(self, index, version, size): assert len(index.offsets()) == size # get all data of all objects - for oidx in xrange(index.size()): + for oidx in range(index.size()): sha = index.sha(oidx) assert oidx == index.sha_to_index(sha) @@ -155,7 +149,7 @@ def test_pack_entity(self, rw_dir): pack_objs.extend(entity.stream_iter()) count = 0 - for info, stream in izip(entity.info_iter(), entity.stream_iter()): + for info, stream in zip(entity.info_iter(), entity.stream_iter()): count += 1 assert info.binsha == stream.binsha assert len(info.binsha) == 20 diff --git a/gitdb/utils/compat.py b/gitdb/utils/compat.py deleted file mode 100644 index a7899cb..0000000 --- a/gitdb/utils/compat.py +++ /dev/null @@ -1,43 +0,0 @@ -import sys - -PY3 = sys.version_info[0] == 3 - -try: - from itertools import izip - xrange = xrange -except ImportError: - # py3 - izip = zip - xrange = range -# end handle python version - -try: - # Python 2 - buffer = buffer - memoryview = buffer - # Assume no memory view ... - def to_bytes(i): - return i -except NameError: - # Python 3 has no `buffer`; only `memoryview` - # However, it's faster to just slice the object directly, maybe it keeps a view internally - def buffer(obj, offset, size=None): - if size is None: - # return memoryview(obj)[offset:] - return obj[offset:] - else: - # return memoryview(obj)[offset:offset+size] - return obj[offset:offset + size] - # end buffer reimplementation - # smmap can return memory view objects, which can't be compared as buffers/bytes can ... - def to_bytes(i): - if isinstance(i, memoryview): - return i.tobytes() - return i - - memoryview = memoryview - -try: - MAXSIZE = sys.maxint -except AttributeError: - MAXSIZE = sys.maxsize diff --git a/gitdb/utils/encoding.py b/gitdb/utils/encoding.py index 4d270af..25ebead 100644 --- a/gitdb/utils/encoding.py +++ b/gitdb/utils/encoding.py @@ -1,31 +1,18 @@ -from gitdb.utils import compat - -if compat.PY3: - string_types = (str, ) - text_type = str -else: - string_types = (basestring, ) - text_type = unicode - - def force_bytes(data, encoding="ascii"): if isinstance(data, bytes): return data - if isinstance(data, string_types): + if isinstance(data, str): return data.encode(encoding) return data def force_text(data, encoding="utf-8"): - if isinstance(data, text_type): + if isinstance(data, str): return data if isinstance(data, bytes): return data.decode(encoding) - if compat.PY3: - return text_type(data, encoding) - else: - return text_type(data) + return str(data, encoding)