From 4e781216792e2af28e73a4d9cb7551e34c2c0887 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Wed, 15 Jun 2022 15:09:53 -0500 Subject: [PATCH 01/22] add np serialization and tests --- datajoint/blob.py | 12 +++++++++++- tests/test_blob.py | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index e6ff2a581..edbc4164f 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -141,6 +141,7 @@ def read_blob(self, n_bytes=None): "F": self.read_recarray, # numpy array with fields, including recarrays "d": self.read_decimal, # a decimal "t": self.read_datetime, # date, time, or datetime + "T": self.read_np_datetime, # np.datetime64 "u": self.read_uuid, # UUID }[data_structure_code] except KeyError: @@ -182,7 +183,9 @@ def pack_blob(self, obj): return self.pack_array(np.array(obj)) if isinstance(obj, (float, int, complex)): return self.pack_array(np.array(obj)) - if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): + if isinstance( + obj, (datetime.datetime, datetime.date, datetime.time, np.datetime64) + ): return self.pack_datetime(obj) if isinstance(obj, Decimal): return self.pack_decimal(obj) @@ -504,12 +507,18 @@ def read_datetime(self): ) return time and date and datetime.datetime.combine(date, time) or time or date + def read_np_datetime(self): + data = self.read_value() + return data.astype("datetime64[us]") + @staticmethod def pack_datetime(d): if isinstance(d, datetime.datetime): date, time = d.date(), d.time() elif isinstance(d, datetime.date): date, time = d, None + elif isinstance(d, np.datetime64): + return b"T" + (d.astype("datetime64[us]")).tobytes() else: date, time = None, d return b"t" + ( @@ -542,6 +551,7 @@ def read_value(self, dtype=None, count=1): if dtype is None: dtype = "uint32" if use_32bit_dims else "uint64" data = np.frombuffer(self._blob, dtype=dtype, count=count, offset=self._pos) + # print(data, flush=True) self._pos += data.dtype.itemsize * data.size return data[0] if count == 1 else data diff --git a/tests/test_blob.py b/tests/test_blob.py index 6e7943399..f92099d50 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -1,4 +1,5 @@ import datajoint as dj +import timeit import numpy as np import uuid from . import schema @@ -149,6 +150,9 @@ def test_pack(): x == unpack(pack(x)), "Numpy string array object did not pack/unpack correctly" ) + x = np.datetime64("1998").astype("datetime64[us]") + assert_true(x == unpack(pack(x))) + def test_recarrays(): x = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", float), ("y", int)]) @@ -222,3 +226,16 @@ def test_insert_longblob(): } (schema.Longblob & "id=1").delete() dj.blob.use_32bit_dims = False + + +def test_datetime_serialization_speed(): + # If this fails that means for some reason deserializing/serializing + # np.datetime64 types is now slower than regular datetime + + numpy_exe_time = timeit.timeit( + "unpack(pack(np.datetime64('now')))", number=100, globals=globals() + ) + python_exe_time = timeit.timeit( + "unpack(pack(datetime.now()))", number=100, globals=globals() + ) + assert numpy_exe_time < python_exe_time From b65338906ace14844deb4fed896853b3f9743eba Mon Sep 17 00:00:00 2001 From: jverswijver Date: Wed, 15 Jun 2022 15:14:44 -0500 Subject: [PATCH 02/22] remove print statement. --- datajoint/blob.py | 1 - 1 file changed, 1 deletion(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index edbc4164f..0cdc26810 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -551,7 +551,6 @@ def read_value(self, dtype=None, count=1): if dtype is None: dtype = "uint32" if use_32bit_dims else "uint64" data = np.frombuffer(self._blob, dtype=dtype, count=count, offset=self._pos) - # print(data, flush=True) self._pos += data.dtype.itemsize * data.size return data[0] if count == 1 else data From d379dfdad8f4b63f2909b4055213dedd072e343b Mon Sep 17 00:00:00 2001 From: jverswijver Date: Mon, 20 Jun 2022 11:07:58 -0500 Subject: [PATCH 03/22] add way to serialize np.arrays of datetime64 --- datajoint/blob.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 0cdc26810..4a5f50c73 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -141,7 +141,8 @@ def read_blob(self, n_bytes=None): "F": self.read_recarray, # numpy array with fields, including recarrays "d": self.read_decimal, # a decimal "t": self.read_datetime, # date, time, or datetime - "T": self.read_np_datetime, # np.datetime64 + "T": self.read_int64_datetime, # np.datetime64 + "Z": self.read_np_array_dt64, "u": self.read_uuid, # UUID }[data_structure_code] except KeyError: @@ -243,6 +244,9 @@ def pack_array(self, array): """ Serialize an np.ndarray into bytes. Scalars are encoded with ndim=0. """ + if "datetime64" in array.dtype.name: + self.set_dj0() + return b"Z" + array.astype("datetime64[us]").tobytes() blob = ( b"A" + np.uint64(array.ndim).tobytes() @@ -429,6 +433,10 @@ def pack_dict(self, d): ) ) + def read_np_array_dt64(self): + data = self.read_value(dtype=" Date: Mon, 20 Jun 2022 11:13:39 -0500 Subject: [PATCH 04/22] remove refrences to numpy in function names --- datajoint/blob.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 4a5f50c73..2ba8e0cfb 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -142,7 +142,7 @@ def read_blob(self, n_bytes=None): "d": self.read_decimal, # a decimal "t": self.read_datetime, # date, time, or datetime "T": self.read_int64_datetime, # np.datetime64 - "Z": self.read_np_array_dt64, + "Z": self.read_array_dt64, "u": self.read_uuid, # UUID }[data_structure_code] except KeyError: @@ -433,7 +433,7 @@ def pack_dict(self, d): ) ) - def read_np_array_dt64(self): + def read_array_dt64(self): data = self.read_value(dtype=" Date: Mon, 20 Jun 2022 11:36:38 -0500 Subject: [PATCH 05/22] update tests. --- tests/test_blob.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/test_blob.py b/tests/test_blob.py index f92099d50..1a13021cc 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -230,12 +230,19 @@ def test_insert_longblob(): def test_datetime_serialization_speed(): # If this fails that means for some reason deserializing/serializing - # np.datetime64 types is now slower than regular datetime + # np arrays of np.datetime64 types is now slower than regular arrays of datetime64 - numpy_exe_time = timeit.timeit( - "unpack(pack(np.datetime64('now')))", number=100, globals=globals() + np_array_dt_exe_time = timeit.timeit( + setup='myarr=pack(np.array([np.datetime64(f"{x}") for x in range(1900, 2000)]))', + stmt="unpack(myarr)", + number=10, + globals=globals(), ) - python_exe_time = timeit.timeit( - "unpack(pack(datetime.now()))", number=100, globals=globals() + python_array_dt_exe_time = timeit.timeit( + setup='myarr2=pack([np.datetime64(f"{x}") for x in range(1900, 2000)])', + stmt="unpack(myarr2)", + number=10, + globals=globals(), ) - assert numpy_exe_time < python_exe_time + + assert np_array_dt_exe_time < python_array_dt_exe_time From ae7b93cba495f76fd3f567d7c436565013f1141d Mon Sep 17 00:00:00 2001 From: jverswijver Date: Mon, 20 Jun 2022 15:17:20 -0500 Subject: [PATCH 06/22] refactor datetime serialization to use scalar dictionary --- datajoint/blob.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 2ba8e0cfb..8df6b11b6 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -34,6 +34,7 @@ ("mxINT64_CLASS", np.dtype("int64")), ("mxUINT64_CLASS", np.dtype("uint64")), ("mxFUNCTION_CLASS", None), + ("NON_mx_DATETIME64", np.dtype(" Date: Tue, 21 Jun 2022 10:17:08 -0500 Subject: [PATCH 07/22] remove refrences to matlab types to make scalars more language agnostic --- datajoint/blob.py | 52 +++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 8df6b11b6..8d436e9ef 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -14,33 +14,33 @@ from .settings import config -mxClassID = dict( +scalar_id = dict( ( # see http://www.mathworks.com/help/techdoc/apiref/mxclassid.html - ("mxUNKNOWN_CLASS", None), - ("mxCELL_CLASS", None), - ("mxSTRUCT_CLASS", None), - ("mxLOGICAL_CLASS", np.dtype("bool")), - ("mxCHAR_CLASS", np.dtype("c")), - ("mxVOID_CLASS", np.dtype("O")), - ("mxDOUBLE_CLASS", np.dtype("float64")), - ("mxSINGLE_CLASS", np.dtype("float32")), - ("mxINT8_CLASS", np.dtype("int8")), - ("mxUINT8_CLASS", np.dtype("uint8")), - ("mxINT16_CLASS", np.dtype("int16")), - ("mxUINT16_CLASS", np.dtype("uint16")), - ("mxINT32_CLASS", np.dtype("int32")), - ("mxUINT32_CLASS", np.dtype("uint32")), - ("mxINT64_CLASS", np.dtype("int64")), - ("mxUINT64_CLASS", np.dtype("uint64")), - ("mxFUNCTION_CLASS", None), - ("NON_mx_DATETIME64", np.dtype(" Date: Tue, 21 Jun 2022 11:21:29 -0500 Subject: [PATCH 08/22] update matlab type codes to be explicit --- datajoint/blob.py | 28 ++++++++++++++++++++++++---- tests/test_blob.py | 4 ++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 8d436e9ef..8e2c1e0ad 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -38,7 +38,23 @@ ) ) -rev_class_id = {dtype: i for i, dtype in enumerate(scalar_id.values())} +# Matlab numeric codes +matlab_scalar_mapping = { + np.dtype("bool"): 3, # LOGICAL + np.dtype("c"): 4, # CHAR + np.dtype("O"): 5, # VOID + np.dtype("float64"): 6, # DOUBLE + np.dtype("float32"): 7, # SINGLE + np.dtype("int8"): 8, # INT8 + np.dtype("uint8"): 9, # UINT8 + np.dtype("int16"): 10, # INT16 + np.dtype("uint16"): 11, # UINT16 + np.dtype("int32"): 12, # INT32 + np.dtype("uint32"): 13, # UINT32 + np.dtype("int64"): 14, # INT64 + np.dtype("uint64"): 15, # UINT64 +} + dtype_list = list(scalar_id.values()) type_names = list(scalar_id) @@ -256,9 +272,13 @@ def pack_array(self, array): if is_complex: array, imaginary = np.real(array), np.imag(array) type_id = ( - rev_class_id[array.dtype] - if array.dtype.char != "U" - else rev_class_id[np.dtype("O")] + matlab_scalar_mapping[np.dtype("O")] + if array.dtype not in matlab_scalar_mapping + else ( + matlab_scalar_mapping[array.dtype] + if array.dtype.char != "U" + else matlab_scalar_mapping[np.dtype("O")] + ) ) if dtype_list[type_id] is None: raise DataJointError("Type %s is ambiguous or unknown" % array.dtype) diff --git a/tests/test_blob.py b/tests/test_blob.py index 1a13021cc..ce4cf3993 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -233,13 +233,13 @@ def test_datetime_serialization_speed(): # np arrays of np.datetime64 types is now slower than regular arrays of datetime64 np_array_dt_exe_time = timeit.timeit( - setup='myarr=pack(np.array([np.datetime64(f"{x}") for x in range(1900, 2000)]))', + setup="myarr=pack(np.array([np.datetime64(f'{x}') for x in range(1900, 2000)]))", stmt="unpack(myarr)", number=10, globals=globals(), ) python_array_dt_exe_time = timeit.timeit( - setup='myarr2=pack([np.datetime64(f"{x}") for x in range(1900, 2000)])', + setup="myarr2=pack([datetime.now() for x in range (1900, 2000)])", stmt="unpack(myarr2)", number=10, globals=globals(), From 50ef3d1e0a2c3dfb598371064d372ea76953ad47 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 21 Jun 2022 11:23:17 -0500 Subject: [PATCH 09/22] change variable name --- datajoint/blob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 8e2c1e0ad..211e1f00a 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -14,7 +14,7 @@ from .settings import config -scalar_id = dict( +scalar_types = dict( ( # see http://www.mathworks.com/help/techdoc/apiref/mxclassid.html ("UNKNOWN", None), From c0f9af41e595665e5294611bc9a0dfcc59b683ea Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 21 Jun 2022 11:24:19 -0500 Subject: [PATCH 10/22] fix small bug --- datajoint/blob.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 211e1f00a..ab7890271 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -14,7 +14,7 @@ from .settings import config -scalar_types = dict( +scalar_type = dict( ( # see http://www.mathworks.com/help/techdoc/apiref/mxclassid.html ("UNKNOWN", None), @@ -55,8 +55,8 @@ np.dtype("uint64"): 15, # UINT64 } -dtype_list = list(scalar_id.values()) -type_names = list(scalar_id) +dtype_list = list(scalar_type.values()) +type_names = list(scalar_type) compression = {b"ZL123\0": zlib.decompress} From 8596c4437b2a3ed62d24755eeb93c81bb376f254 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Thu, 23 Jun 2022 10:39:42 -0500 Subject: [PATCH 11/22] refactor blob.py --- datajoint/blob.py | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index ab7890271..8ede4aff9 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -38,8 +38,7 @@ ) ) -# Matlab numeric codes -matlab_scalar_mapping = { +scalar_codes = { np.dtype("bool"): 3, # LOGICAL np.dtype("c"): 4, # CHAR np.dtype("O"): 5, # VOID @@ -53,10 +52,16 @@ np.dtype("uint32"): 13, # UINT32 np.dtype("int64"): 14, # INT64 np.dtype("uint64"): 15, # UINT64 + np.dtype( + " Date: Mon, 27 Jun 2022 13:52:14 -0500 Subject: [PATCH 12/22] update changelog, fix logger, pin networkx version. --- CHANGELOG.md | 5 +++++ datajoint/logging.py | 7 +------ docs-parts/intro/Releases_lang1.rst | 6 ++++++ requirements.txt | 2 +- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5436bc1db..7f95a016a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## Release notes +### 0.13.7 -- TBD +* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 +* Add - Support for serializing numpy datetime64 types PR #1036 +* Update - Add traceback to default logging PR #1036 + ### 0.13.6 -- Jun 13, 2022 * Add - Config option to set threshold for when to stop using checksums for filepath stores. PR #1025 * Add - Unified package level logger for package (#667) PR #1031 diff --git a/datajoint/logging.py b/datajoint/logging.py index bf0d97716..5f0f3eb06 100644 --- a/datajoint/logging.py +++ b/datajoint/logging.py @@ -21,12 +21,7 @@ def excepthook(exc_type, exc_value, exc_traceback): sys.__excepthook__(exc_type, exc_value, exc_traceback) return - if logger.getEffectiveLevel() == 10: - logger.debug( - "Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback) - ) - else: - logger.error(f"Uncaught exception: {exc_value}") + logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback)) sys.excepthook = excepthook diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 6a803eebe..de28136f5 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,3 +1,9 @@ +0.13.7 -- TBD +-------------- +* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 +* Add - Support for serializing numpy datetime64 types PR #1036 +* Update - Add traceback to default logging PR #1036 + 0.13.6 -- Jun 13, 2022 ---------------------- * Add - Config option to set threshold for when to stop using checksums for filepath stores. PR #1025 diff --git a/requirements.txt b/requirements.txt index 65c0c8b6f..5f1e7961e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ pyparsing ipython pandas tqdm -networkx +networkx<2.8.3 pydot minio>=7.0.0 matplotlib From fd24525a3b78697514ca547af9ea33b67f963970 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Mon, 27 Jun 2022 14:40:48 -0500 Subject: [PATCH 13/22] update changelog --- CHANGELOG.md | 4 ++-- docs-parts/intro/Releases_lang1.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f95a016a..0224d58c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,8 @@ ## Release notes ### 0.13.7 -- TBD -* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 -* Add - Support for serializing numpy datetime64 types PR #1036 +* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 (#1035) +* Add - Support for serializing numpy datetime64 types PR #1036 (#1022) * Update - Add traceback to default logging PR #1036 ### 0.13.6 -- Jun 13, 2022 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index de28136f5..343820547 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,7 +1,7 @@ 0.13.7 -- TBD -------------- -* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 -* Add - Support for serializing numpy datetime64 types PR #1036 +* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 (#1035) +* Add - Support for serializing numpy datetime64 types PR #1036 (#1022) * Update - Add traceback to default logging PR #1036 0.13.6 -- Jun 13, 2022 From bea178126b4acc2f0d52cc08408b88098eac4df7 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Thu, 30 Jun 2022 14:38:42 -0500 Subject: [PATCH 14/22] apply suggestions from code review. --- datajoint/blob.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 8ede4aff9..37e951dde 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -163,7 +163,6 @@ def read_blob(self, n_bytes=None): "F": self.read_recarray, # numpy array with fields, including recarrays "d": self.read_decimal, # a decimal "t": self.read_datetime, # date, time, or datetime - "T": self.read_int64_datetime, # np.datetime64 "u": self.read_uuid, # UUID }[data_structure_code] except KeyError: @@ -199,15 +198,13 @@ def pack_blob(self, obj): return self.pack_float(obj) if isinstance(obj, np.ndarray) and obj.dtype.fields: return self.pack_recarray(np.array(obj)) - if isinstance(obj, np.number): + if isinstance(obj, (np.number, np.datetime64)): return self.pack_array(np.array(obj)) if isinstance(obj, (bool, np.bool_)): return self.pack_array(np.array(obj)) if isinstance(obj, (float, int, complex)): return self.pack_array(np.array(obj)) - if isinstance( - obj, (datetime.datetime, datetime.date, datetime.time, np.datetime64) - ): + if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): return self.pack_datetime(obj) if isinstance(obj, Decimal): return self.pack_decimal(obj) @@ -538,18 +535,12 @@ def read_datetime(self): ) return time and date and datetime.datetime.combine(date, time) or time or date - def read_int64_datetime(self): - data = self.read_value() - return data.astype("datetime64[us]") - @staticmethod def pack_datetime(d): if isinstance(d, datetime.datetime): date, time = d.date(), d.time() elif isinstance(d, datetime.date): date, time = d, None - elif isinstance(d, np.datetime64): - return b"T" + (d.astype("datetime64[us]")).tobytes() else: date, time = None, d return b"t" + ( From 42b0537dbc350a6a73dc4365d7cf0f7716bd3267 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Thu, 7 Jul 2022 09:19:50 -0500 Subject: [PATCH 15/22] refactor --- datajoint/blob.py | 100 +++++++++++++++++++++------------------------- requirements.txt | 2 +- 2 files changed, 47 insertions(+), 55 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 37e951dde..bef39ce6f 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -14,53 +14,42 @@ from .settings import config -scalar_type = dict( - ( - # see http://www.mathworks.com/help/techdoc/apiref/mxclassid.html - ("UNKNOWN", None), - ("CELL", None), - ("STRUCT", None), - ("LOGICAL", np.dtype("bool")), - ("CHAR", np.dtype("c")), - ("VOID", np.dtype("O")), - ("DOUBLE", np.dtype("float64")), - ("SINGLE", np.dtype("float32")), - ("INT8", np.dtype("int8")), - ("UINT8", np.dtype("uint8")), - ("INT16", np.dtype("int16")), - ("UINT16", np.dtype("uint16")), - ("INT32", np.dtype("int32")), - ("UINT32", np.dtype("uint32")), - ("INT64", np.dtype("int64")), - ("UINT64", np.dtype("uint64")), - ("FUNCTION", None), - ("DATETIME64", np.dtype("=7.0.0 matplotlib From 333b44b2a0b2ee27ef20de1209313e2cf750b6c9 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Thu, 7 Jul 2022 10:45:32 -0500 Subject: [PATCH 16/22] fix bug --- datajoint/blob.py | 7 ++++--- requirements.txt | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index bef39ce6f..8124bbfec 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -41,9 +41,10 @@ 134: {"dtype": np.dtype("=7.0.0 matplotlib From 5d8bc72129d11a3bfe9f9a142e500760b433d332 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 12 Jul 2022 10:53:08 -0500 Subject: [PATCH 17/22] apply suggestions from code review. --- datajoint/blob.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index 8124bbfec..f4e3f192e 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -32,19 +32,19 @@ 14: {"dtype": np.dtype("int64"), "scalar_type": "INT64"}, 15: {"dtype": np.dtype("uint64"), "scalar_type": "UINT64"}, 16: {"dtype": None, "scalar_type": "FUNCTION"}, - 128: {"dtype": np.dtype(" Date: Tue, 12 Jul 2022 11:10:11 -0500 Subject: [PATCH 18/22] fix bug --- datajoint/blob.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/datajoint/blob.py b/datajoint/blob.py index f4e3f192e..a0c1e5455 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -32,19 +32,19 @@ 14: {"dtype": np.dtype("int64"), "scalar_type": "INT64"}, 15: {"dtype": np.dtype("uint64"), "scalar_type": "UINT64"}, 16: {"dtype": None, "scalar_type": "FUNCTION"}, - 65536: {"dtype": np.dtype(" Date: Tue, 12 Jul 2022 16:17:28 -0500 Subject: [PATCH 19/22] small updates. --- CHANGELOG.md | 4 ++-- datajoint/blob.py | 32 ++++++++++++++--------------- docs-parts/intro/Releases_lang1.rst | 6 +++--- tests/test_blob.py | 11 +++++----- 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0224d58c2..616fa5eb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ ## Release notes -### 0.13.7 -- TBD -* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 (#1035) +### 0.13.7 -- Jul 13, 2022 +* Bugfix - Fix networkx incompatable change by version pinning to 2.6.3 PR #1036 (#1035) * Add - Support for serializing numpy datetime64 types PR #1036 (#1022) * Update - Add traceback to default logging PR #1036 diff --git a/datajoint/blob.py b/datajoint/blob.py index a0c1e5455..9f4a148ca 100644 --- a/datajoint/blob.py +++ b/datajoint/blob.py @@ -32,19 +32,19 @@ 14: {"dtype": np.dtype("int64"), "scalar_type": "INT64"}, 15: {"dtype": np.dtype("uint64"), "scalar_type": "UINT64"}, 16: {"dtype": None, "scalar_type": "FUNCTION"}, - 65536: {"dtype": np.dtype("datetime64[Y]"), "scalar_type": "DATETIME64[Y]"}, - 65537: {"dtype": np.dtype("datetime64[M]"), "scalar_type": "DATETIME64[M]"}, - 65538: {"dtype": np.dtype("datetime64[W]"), "scalar_type": "DATETIME64[W]"}, - 65539: {"dtype": np.dtype("datetime64[D]"), "scalar_type": "DATETIME64[D]"}, - 65540: {"dtype": np.dtype("datetime64[h]"), "scalar_type": "DATETIME64[h]"}, - 65541: {"dtype": np.dtype("datetime64[m]"), "scalar_type": "DATETIME64[m]"}, - 65542: {"dtype": np.dtype("datetime64[s]"), "scalar_type": "DATETIME64[s]"}, - 65543: {"dtype": np.dtype("datetime64[ms]"), "scalar_type": "DATETIME64[ms]"}, - 65544: {"dtype": np.dtype("datetime64[us]"), "scalar_type": "DATETIME64[us]"}, - 65545: {"dtype": np.dtype("datetime64[ns]"), "scalar_type": "DATETIME64[ns]"}, - 65546: {"dtype": np.dtype("datetime64[ps]"), "scalar_type": "DATETIME64[ps]"}, - 65547: {"dtype": np.dtype("datetime64[fs]"), "scalar_type": "DATETIME64[fs]"}, - 65548: {"dtype": np.dtype("datetime64[as]"), "scalar_type": "DATETIME64[as]"}, + 65_536: {"dtype": np.dtype("datetime64[Y]"), "scalar_type": "DATETIME64[Y]"}, + 65_537: {"dtype": np.dtype("datetime64[M]"), "scalar_type": "DATETIME64[M]"}, + 65_538: {"dtype": np.dtype("datetime64[W]"), "scalar_type": "DATETIME64[W]"}, + 65_539: {"dtype": np.dtype("datetime64[D]"), "scalar_type": "DATETIME64[D]"}, + 65_540: {"dtype": np.dtype("datetime64[h]"), "scalar_type": "DATETIME64[h]"}, + 65_541: {"dtype": np.dtype("datetime64[m]"), "scalar_type": "DATETIME64[m]"}, + 65_542: {"dtype": np.dtype("datetime64[s]"), "scalar_type": "DATETIME64[s]"}, + 65_543: {"dtype": np.dtype("datetime64[ms]"), "scalar_type": "DATETIME64[ms]"}, + 65_544: {"dtype": np.dtype("datetime64[us]"), "scalar_type": "DATETIME64[us]"}, + 65_545: {"dtype": np.dtype("datetime64[ns]"), "scalar_type": "DATETIME64[ns]"}, + 65_546: {"dtype": np.dtype("datetime64[ps]"), "scalar_type": "DATETIME64[ps]"}, + 65_547: {"dtype": np.dtype("datetime64[fs]"), "scalar_type": "DATETIME64[fs]"}, + 65_548: {"dtype": np.dtype("datetime64[as]"), "scalar_type": "DATETIME64[as]"}, } serialize_lookup = { v["dtype"]: {"type_id": k, "scalar_type": v["scalar_type"]} @@ -269,14 +269,13 @@ def pack_array(self, array): try: type_id = serialize_lookup[array.dtype]["type_id"] except KeyError: + # U is for unicode string if array.dtype.char == "U": type_id = serialize_lookup[np.dtype("O")]["type_id"] - pass else: - raise DataJointError("Type %s is ambiguous or unknown" % array.dtype) + raise DataJointError(f"Type {array.dtype} is ambiguous or unknown") blob += np.array([type_id, is_complex], dtype=np.uint32).tobytes() - # array of dtype('O'), U is for unicode string if ( array.dtype.char == "U" or serialize_lookup[array.dtype]["scalar_type"] == "VOID" @@ -286,7 +285,6 @@ def pack_array(self, array): for it in (self.pack_blob(e) for e in array.flatten(order="F")) ) self.set_dj0() # not supported by original mym - # array of dtype('c') elif serialize_lookup[array.dtype]["scalar_type"] == "CHAR": blob += ( array.view(np.uint8).astype(np.uint16).tobytes() diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 343820547..5264a2d32 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,6 +1,6 @@ -0.13.7 -- TBD --------------- -* Bugfix - Fix networkx incompatable change by version pinning to 2.8.3 PR #1036 (#1035) +0.13.7 -- Jul 13, 2022 +---------------------- +* Bugfix - Fix networkx incompatable change by version pinning to 2.6.3 PR #1036 (#1035) * Add - Support for serializing numpy datetime64 types PR #1036 (#1022) * Update - Add traceback to default logging PR #1036 diff --git a/tests/test_blob.py b/tests/test_blob.py index ce4cf3993..1d938ddb2 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -2,6 +2,7 @@ import timeit import numpy as np import uuid +from faker import Faker from . import schema from decimal import Decimal from datetime import datetime @@ -232,17 +233,17 @@ def test_datetime_serialization_speed(): # If this fails that means for some reason deserializing/serializing # np arrays of np.datetime64 types is now slower than regular arrays of datetime64 - np_array_dt_exe_time = timeit.timeit( - setup="myarr=pack(np.array([np.datetime64(f'{x}') for x in range(1900, 2000)]))", + optimized_exe_time = timeit.timeit( + setup="myarr=pack(np.array([np.datetime64('2022-10-13 03:03:13') for _ in range(0, 10000)]))", stmt="unpack(myarr)", number=10, globals=globals(), ) - python_array_dt_exe_time = timeit.timeit( - setup="myarr2=pack([datetime.now() for x in range (1900, 2000)])", + baseline_exe_time = timeit.timeit( + setup="myarr2=pack(np.array([datetime(2022,10,13,3,3,13) for _ in range (0, 10000)]))", stmt="unpack(myarr2)", number=10, globals=globals(), ) - assert np_array_dt_exe_time < python_array_dt_exe_time + assert optimized_exe_time * 1000 < baseline_exe_time From ef243e6afc04725b22fb6a1ec1b1a4b4374dafe1 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 12 Jul 2022 16:19:05 -0500 Subject: [PATCH 20/22] small changes. --- tests/test_blob.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_blob.py b/tests/test_blob.py index 1d938ddb2..849e24f55 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -239,11 +239,13 @@ def test_datetime_serialization_speed(): number=10, globals=globals(), ) + print(f"np time {optimized_exe_time}") baseline_exe_time = timeit.timeit( setup="myarr2=pack(np.array([datetime(2022,10,13,3,3,13) for _ in range (0, 10000)]))", stmt="unpack(myarr2)", number=10, globals=globals(), ) + print(f"python time {baseline_exe_time}") assert optimized_exe_time * 1000 < baseline_exe_time From 33b9d78a2cb1560723a5f273a6b864bb571ff5ed Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 12 Jul 2022 16:27:55 -0500 Subject: [PATCH 21/22] remove faker. --- tests/test_blob.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_blob.py b/tests/test_blob.py index 849e24f55..91b2ce131 100644 --- a/tests/test_blob.py +++ b/tests/test_blob.py @@ -2,7 +2,6 @@ import timeit import numpy as np import uuid -from faker import Faker from . import schema from decimal import Decimal from datetime import datetime From a37cb1e140b145e4ca76367c7179f262ff9d3350 Mon Sep 17 00:00:00 2001 From: jverswijver Date: Tue, 12 Jul 2022 16:37:08 -0500 Subject: [PATCH 22/22] change version --- datajoint/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/version.py b/datajoint/version.py index 00ba24b09..50c8b9c68 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.13.6" +__version__ = "0.13.7" assert len(__version__) <= 10 # The log table limits version to the 10 characters