Skip to content
This repository has been archived by the owner on Mar 31, 2019. It is now read-only.

Commit

Permalink
Python 3's Numba requires Schemas to be hashable for boxing
Browse files Browse the repository at this point in the history
  • Loading branch information
jpivarski committed Feb 2, 2018
1 parent 69f0b75 commit 8255ec1
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 19 deletions.
64 changes: 50 additions & 14 deletions oamap/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,11 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(Primitive(self._dtype, nullable=self._nullable, data=self._data, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((Primitive, self._dtype, self._nullable, self._data, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
return isinstance(other, Primitive) and self.dtype == other.dtype and self.nullable == other.nullable and self.data == other.data and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata
return isinstance(other, Primitive) and self._dtype == other._dtype and self._nullable == other._nullable and self._data == other._data and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata

def __contains__(self, value, memo=None):
if value is None:
Expand Down Expand Up @@ -782,12 +785,15 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(List(self._content.replace(fcn, *args, **kwds), nullable=self._nullable, starts=self._starts, stops=self._stops, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((List, self._content, self._nullable, self._starts, self._stops, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
return memo[id(self)] == id(other)
if not (isinstance(other, List) and self.starts == other.starts and self.stops == other.stops and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata):
if not (isinstance(other, List) and self._nullable == other._nullable and self._starts == other._starts and self._stops == other._stops and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata):
return False
memo[id(self)] = id(other)
return self.content.__eq__(other.content, memo)
Expand Down Expand Up @@ -1033,7 +1039,7 @@ def _fromjson(data, labels):
if not isinstance(data["possibilities"], list):
raise TypeError("argument 'possibilities' for Union Schema from JSON should be a list, not {0}".format(repr(data["possibilities"])))
out = Union.__new__(Union)
out._possibilities = [Schema._fromjson(x, labels) for x in data["possibilities"]]
out.possibilities = [Schema._fromjson(x, labels) for x in data["possibilities"]]
out.nullable = data.get("nullable", False)
out.tags = data.get("tags", None)
out.offsets = data.get("offsets", None)
Expand Down Expand Up @@ -1087,12 +1093,15 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(Union([x.replace(fcn, *args, **kwds) for x in self._possibilities], nullable=self._nullable, tags=self._tags, offsets=self._offsets, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((Union, self._possibilities, self._nullable, self._tags, self._offsets, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
return memo[id(self)] == id(other)
if not (isinstance(other, Union) and len(self.possibilities) == len(other.possibilities) and self.nullable == other.nullable and self.tags == other.tags and self.offsets == other.offsets and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata):
if not (isinstance(other, Union) and len(self._possibilities) == len(other._possibilities) and self._nullable == other._nullable and self._tags == other._tags and self._offsets == other._offsets and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata):
return False
memo[id(self)] = id(other)
return all(x.__eq__(y, memo) for x, y in zip(self.possibilities, other.possibilities))
Expand All @@ -1101,7 +1110,7 @@ def __contains__(self, value, memo=None):
if memo is None:
memo = {}
if value is None:
return self.nullable or any(x.nullable for x in self.possibilities)
return self._nullable or any(x._nullable for x in self._possibilities)
return any(x.__contains__(value, memo) for x in self.possibilities)

def _get_tags(self, prefix, delimiter):
Expand Down Expand Up @@ -1337,12 +1346,15 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(Record(OrderedDict((n, x.replace(fcn, *args, **kwds)) for n, x in self._fields.items()), nullable=self._nullable, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((Record, tuple(self._fields.items()), self._nullable, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
return memo[id(self)] == id(other)
if not (isinstance(other, Record) and set(self._fields) == set(other._fields) and self.nullable == other.nullable and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata):
if not (isinstance(other, Record) and set(self._fields) == set(other._fields) and self._nullable == other._nullable and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata):
return False
memo[id(self)] = id(other)
return all(self._fields[n].__eq__(other._fields[n], memo) for n in self._fields)
Expand All @@ -1353,13 +1365,13 @@ def __contains__(self, value, memo=None):
if value is None:
return self.nullable
if isinstance(value, dict):
return all(n in value and x.__contains__(value[n], memo) for n, x in self.fields.items())
return all(n in value and x.__contains__(value[n], memo) for n, x in self._fields.items())
elif isinstance(value, tuple) and hasattr(value, "_fields"):
return all(n in value._fields and x.__contains__(getattr(value, n), memo) for n, x in self.fields.items())
return all(n in value._fields and x.__contains__(getattr(value, n), memo) for n, x in self._fields.items())
elif isinstance(value, (list, tuple)):
return False
else:
return all(hasattr(value, n) and x.__contains__(getattr(value, n), memo) for n, x in self.fields.items())
return all(hasattr(value, n) and x.__contains__(getattr(value, n), memo) for n, x in self._fields.items())

def _get_field(self, prefix, delimiter, n):
return self._get_name(prefix, delimiter) + delimiter + "F" + n
Expand Down Expand Up @@ -1586,12 +1598,15 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(Tuple([x.replace(fcn, *args, **kwds) for x in self._types], nullable=self._nullable, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((Tuple, self._types, self._nullable, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
return memo[id(self)] == id(other)
if not (isinstance(other, Tuple) and len(self._types) == len(other._types) and self.nullable == other.nullable and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata):
if not (isinstance(other, Tuple) and len(self._types) == len(other._types) and self._nullable == other._nullable and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata):
return False
memo[id(self)] = id(other)
return all(x.__eq__(y, memo) for x, y in zip(self._types, other._types))
Expand All @@ -1601,8 +1616,8 @@ def __contains__(self, value, memo=None):
memo = {}
if value is None:
return self.nullable
if isinstance(value, tuple) and len(value) == len(self.types):
return all(x.__contains__(v, memo) for v, x in zip(value, self.types))
if isinstance(value, tuple) and len(value) == len(self._types):
return all(x.__contains__(v, memo) for v, x in zip(value, self._types))
else:
return False

Expand Down Expand Up @@ -1809,12 +1824,15 @@ def copy(self, **replacements):
def replace(self, fcn, *args, **kwds):
return fcn(Pointer(self._target.replace(fcn, *args, **kwds), nullable=self._nullable, positions=self._positions, mask=self._mask, packing=self._packingcopy(), name=self._name, doc=self._doc, metadata=copy.deepcopy(self._metadata)), *args, **kwds)

def __hash__(self):
return hash((Pointer, self._target, self._nullable, self._positions, self._mask, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other, memo=None):
if memo is None:
memo = {}
if id(self) in memo:
return memo[id(self)] == id(other)
if not (isinstance(other, Pointer) and self.nullable == other.nullable and self.positions == other.positions and self.mask == other.mask and self.packing == other.packing and self.name == other.name and self.doc == other.doc and self.metadata == other.metadata):
if not (isinstance(other, Pointer) and self._nullable == other._nullable and self._positions == other._positions and self._mask == other._mask and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata):
return False
memo[id(self)] = id(other)
return self.target.__eq__(other.target, memo)
Expand All @@ -1826,7 +1844,7 @@ def __contains__(self, value, memo=None):
return memo[id(value)] == id(self)
memo[id(value)] = id(self)
if value is None:
return self.nullable
return self._nullable
return self.target.__contains__(value, memo)

def _get_positions(self, prefix, delimiter):
Expand Down Expand Up @@ -1990,6 +2008,15 @@ def empty_partitionlookup(self, delimiter):
def partitionlookup(self, array, delimiter):
return Partitioning.Lookup(array, delimiter, True)

def __hash__(self):
return hash((self.__class__, self._key))

def __eq__(self, other):
return isinstance(other, self.__class__) and self._key == other._key

def __ne__(self, other):
return not self.__eq__(other)

def tojson(self):
return OrderedDict([(self.__class__.__name__, [self.key])])

Expand Down Expand Up @@ -2255,6 +2282,15 @@ def replace(self, fcn, *args, **kwds):
metadata=copy.deepcopy(self._metadata)),
*args, **kwds)

def __hash__(self):
return hash((Dataset, self._schema, self._prefix, self._delimiter, self._extension, self._partitioning, self._packing, self._name, self._doc, oamap.util.python2hashable(self._metadata)))

def __eq__(self, other):
return isinstance(other, Dataset) and self._schema == other._schema and self._prefix == other._prefix and self._delimiter == other._delimiter and self._extension == other._extension and self._partitioning == other._partitioning and self._packing == other._packing and self._name == other._name and self._doc == other._doc and self._metadata == other._metadata

def __ne__(self, other):
return not self.__eq__(other)

def tojsonfile(self, file, *args, **kwds):
json.dump(self.tojson(), file, *args, **kwds)

Expand Down
4 changes: 2 additions & 2 deletions oamap/source/shelve.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def fromdata(self, key, value, schema=None, inferencelimit=None, partitionlimit=

if dataset.partitioning is None:
for n, x in arrays.items():
self.dbm[_asbytes(self.ARRAY + n)] = x
self.dbm[_asbytes(self.ARRAY + n)] = x.tostring()

else:
partitionlookup = dataset.partitioning.empty_partitionlookup(delimiter)
Expand All @@ -225,7 +225,7 @@ def fromdata(self, key, value, schema=None, inferencelimit=None, partitionlimit=
if key in self:
del self[key]

self.dbm[_asbytes(self.ARRAY + key)] = numpy.array(partitionlookup)
self.dbm[_asbytes(self.ARRAY + key)] = numpy.array(partitionlookup).tostring()
self.dbm[_asbytes(self.DATASET + key)] = dataset.tojsonstring()

for partitionid, (numentries, arrays) in enumerate(oamap.fill.fromiterdata(values, generator=generator, limit=partitionlimit, pointer_fromequal=pointer_fromequal)):
Expand Down
10 changes: 10 additions & 0 deletions oamap/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,3 +169,13 @@ def recurse(value, memo):
return memo[id(value)]

return recurse(value, {})

def python2hashable(value):
def recurse(value):
if isinstance(value, dict):
return tuple((n, recurse(value[n])) for n in sorted(value))
elif isinstance(value, list):
return tuple(recurse(x) for x in value)
else:
return value
return recurse(python2json(value))
2 changes: 1 addition & 1 deletion oamap/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

import re

__version__ = "0.10.7"
__version__ = "0.10.8"
version = __version__
version_info = tuple(re.split(r"[-\.]", __version__))

Expand Down
2 changes: 1 addition & 1 deletion tests/test_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_simple(self):
self.assertEqual(d["four"], u"hello")

d["five"] = ["one", b"two", u"three"]
self.assertEqual(d.schema("five"), List(List(Primitive("uint8"), nullable=True, name="UTF8String")))
self.assertEqual(d.schema("five"), List(List(Primitive("uint8"), name="UTF8String")))
self.assertEqual(d["five"], [u"one", u"two", u"three"])

finally:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_shelve.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_simple(self):
self.assertEqual(d["four"], u"hello")

d["five"] = ["one", b"two", u"three"]
self.assertEqual(d.schema("five"), List(List(Primitive("uint8"), nullable=True, name="UTF8String")))
self.assertEqual(d.schema("five"), List(List(Primitive("uint8"), name="UTF8String")))
self.assertEqual(d["five"], [u"one", u"two", u"three"])

finally:
Expand Down

0 comments on commit 8255ec1

Please sign in to comment.