Merge branch 'release-v5.5.3'

BTrDB · Aug 1, 2019 · 0e5061e · 0e5061e
2 parents c4a8c72 + 2657527
commit 0e5061e
Show file tree

Hide file tree

Showing 14 changed files with 484 additions and 63 deletions.
diff --git a/btrdb/stream.py b/btrdb/stream.py
@@ -21,12 +21,12 @@
 from copy import deepcopy
 from collections.abc import Sequence
 
+from btrdb.utils.buffer import PointBuffer
 from btrdb.point import RawPoint, StatPoint
 from btrdb.transformers import StreamSetTransformer
-from btrdb.utils.buffer import PointBuffer
-from btrdb.utils.timez import currently_as_ns, to_nanoseconds
-from btrdb.utils.conversion import AnnotationEncoder
 from btrdb.exceptions import BTrDBError, InvalidOperation
+from btrdb.utils.timez import currently_as_ns, to_nanoseconds
+from btrdb.utils.conversion import AnnotationEncoder, AnnotationDecoder
 
 
 ##########################################################################
@@ -39,7 +39,7 @@
 
 try:
     RE_PATTERN = re._pattern_type
-except:
+except Exception:
     RE_PATTERN = re.Pattern
 
 
@@ -74,29 +74,23 @@ def __init__(self, btrdb, uuid, **db_values):
         self._btrdb = btrdb
         self._uuid = uuid
 
-
     def refresh_metadata(self):
         """
         Refreshes the locally cached meta data for a stream
 
         Queries the BTrDB server for all stream metadata including collection,
         annotation, and tags. This method requires a round trip to the server.
-
         """
 
         ep = self._btrdb.ep
         self._collection, self._property_version, self._tags, self._annotations, _ = ep.streamInfo(self._uuid, False, True)
         self._known_to_exist = True
 
         # deserialize annoation values
-        parts = []
-        for k, v in self._annotations.items():
-            try:
-                parts.append([k, json.loads(v)])
-            except json.decoder.JSONDecodeError:
-                parts.append([k, v])
-
-        self._annotations = dict(parts)
+        self._annotations = {
+            key: json.loads(val, cls=AnnotationDecoder)
+            for key, val in self._annotations.items()
+        }
 
     def exists(self):
         """
@@ -128,6 +122,42 @@ def exists(self):
                 return False
             raise bte
 
+    def count(self, start=MINIMUM_TIME, end=MAXIMUM_TIME, pointwidth=62, version=0):
+        """
+        Compute the total number of points in the stream
+
+        Counts the number of points in the specified window and version. By
+        default returns the latest total count of points in the stream. This
+        helper method sums the counts of all StatPoints returned by
+        ``aligned_windows``. Because of this, note that the start and end
+        timestamps may be adjusted if they are not powers of 2. For smaller
+        windows of time, you may also need to adjust the pointwidth to ensure
+        that the count granularity is captured appropriately.
+
+        Parameters
+        ----------
+        start : int or datetime like object, default: MINIMUM_TIME
+            The start time in nanoseconds for the range to be queried. (see
+            :func:`btrdb.utils.timez.to_nanoseconds` for valid input types)
+
+        end : int or datetime like object, default: MAXIMUM_TIME
+            The end time in nanoseconds for the range to be queried. (see
+            :func:`btrdb.utils.timez.to_nanoseconds` for valid input types)
+
+        pointwidth : int, default: 62
+            Specify the number of ns between data points (2**pointwidth)
+
+        version : int, default: 0
+            Version of the stream to query
+
+        Returns
+        -------
+        int
+            The total number of points in the stream for the specified window.
+        """
+        points = self.aligned_windows(start, end, pointwidth, version)
+        return sum([point.count for point, _ in points])
+
     @property
     def btrdb(self):
         """
@@ -396,9 +426,14 @@ def _update_tags_collection(self, tags, collection):
         )
 
     def _update_annotations(self, annotations, encoder):
-        serialized = dict(
-            [[k, json.dumps(v, cls=encoder)] for k, v in annotations.items()]
-        )
+        # make a copy of the annotations to prevent accidental mutable object mutation
+        serialized = deepcopy(annotations)
+        if encoder is not None:
+            serialized = {
+                k: json.dumps(v, cls=encoder, indent=None, allow_nan=True)
+                for k, v in serialized.items()
+            }
+
         self._btrdb.ep.setStreamAnnotations(
             uu=self.uuid,
             expected=self._property_version,
@@ -417,8 +452,9 @@ def update(self, tags=None, annotations=None, collection=None, encoder=Annotatio
             dict of annotation information for the stream.
         collection: str
             The collection prefix for a stream
-        encoder: json.JSONEncoder
-            JSON encoder to class to use for annotation serializations
+        encoder: json.JSONEncoder or None
+            JSON encoder to class to use for annotation serializations, set to
+            None to prevent JSON encoding of the annotations.
 
         Returns
         -------
@@ -772,6 +808,44 @@ def versions(self):
         """
         return self._pinned_versions if self._pinned_versions else self._latest_versions()
 
+    def count(self):
+        """
+        Compute the total number of points in the streams using filters.
+
+        Computes the total number of points across all streams using the
+        specified filters. By default, this returns the latest total count of
+        all points in the streams. The count is modified by start and end
+        filters or by pinning versions.
+
+        Note that this helper method sums the counts of all StatPoints returned
+        by ``aligned_windows``. Because of this the start and end timestamps
+        may be adjusted if they are not powers of 2. You can also set the
+        pointwidth property for smaller windows of time to ensure that the
+        count granularity is captured appropriately.
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        int
+            The total number of points in all streams for the specified filters.
+        """
+        params = self._params_from_filters()
+        start = params.get("start", MINIMUM_TIME)
+        end = params.get("end", MAXIMUM_TIME)
+
+        pointwidth = self.pointwidth if self.pointwidth is not None else 62
+        versions = self._pinned_versions if self._pinned_versions else {}
+
+        count = 0
+        for s in self._streams:
+            version = versions.get(s.uuid, 0)
+            count += s.count(start, end, pointwidth, version)
+
+        return count
+
     def earliest(self):
         """
         Returns earliest points of data in streams using available filters.

diff --git a/btrdb/utils/conversion.py b/btrdb/utils/conversion.py
@@ -19,8 +19,18 @@
 
 import uuid
 import json
+import pytz
+
 from datetime import datetime
 
+try:
+    import numpy as np
+except ImportError:
+    np = None
+
+
+RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
+
 
 ##########################################################################
 ## Classes
@@ -30,23 +40,44 @@ class AnnotationEncoder(json.JSONEncoder):
     """Default JSON encoder class for saving stream annotations"""
 
     def default(self, obj):
-        RFC3339 = "%Y-%m-%d %H:%M:%S.%f%z"
+        """Handle complex and user-specific types"""
+        # handle UUID objects
+        if isinstance(obj, uuid.UUID):
+            return str(obj)
 
         # handle Python datetime
+        # TODO: better handling for timezone naive datetimes
         if isinstance(obj, datetime):
             return obj.strftime(RFC3339)
 
         # handle numpy datetime64
-        try:
-            import numpy as np
-            if isinstance(obj, np.datetime64):
-                return obj.astype(datetime).strftime(RFC3339)
-        except ImportError:
-            pass
+        if np is not None and isinstance(obj, np.datetime64):
+            # We assume that np.datetime64 is UTC timezone because the datetime
+            # will always be timezone naive -- this is kind of shitty
+            # https://numpy.org/devdocs/reference/arrays.datetime.html#changes-with-numpy-1-11
+            return pytz.utc.localize(obj.astype(datetime)).strftime(RFC3339)
 
         # Let the base class default method raise the TypeError
         return json.JSONEncoder.default(self, obj)
 
+    def encode(self, obj):
+        """Do not serialize simple string values with quotes"""
+        serialized = super(AnnotationEncoder, self).encode(obj)
+        if serialized.startswith('"') and serialized.endswith('"'):
+            serialized = serialized.strip('"')
+        return serialized
+
+
+class AnnotationDecoder(json.JSONDecoder):
+    """Default JSON decoder class for deserializing stream annotations"""
+
+    def decode(self, s):
+        """Do not raise JSONDecodeError, just return the raw string"""
+        try:
+            return super(AnnotationDecoder, self).decode(s)
+        except json.JSONDecodeError:
+            return s
+
 
 ##########################################################################
 ## Functions

diff --git a/btrdb/utils/timez.py b/btrdb/utils/timez.py
@@ -30,16 +30,18 @@
 ##########################################################################
 
 DATETIME_FORMATS = (
-	"%Y-%m-%d %H:%M:%S.%f%z", # most common RFC3339 nanoseconds
-	"%Y-%m-%d %H:%M:%S.%f",   # expects UTC default timezone
-	"%Y-%m-%dT%H:%M:%S.%fZ",  # JSON encoding, UTC timezone
-	"%Y-%m-%dT%H:%M:%SZ",	  # JSON encoding, UTC timezone
-	"%Y-%m-%dT%H:%M:%S.%f%z", # less common JSON-ish encoding
-	"%Y-%m-%dT%H:%M:%S.%f",   # for completeness, UTC default timezone
-	"%Y-%m-%d %H:%M:%S%z",	  # human readable date time with TZ
-	"%Y-%m-%d %H:%M:%S",	  # human readable date time UTC default
+    "%Y-%m-%d %H:%M:%S.%f%z",  # most common RFC3339 nanoseconds
+    "%Y-%m-%d %H:%M:%S.%f",    # expects UTC default timezone
+    "%Y-%m-%dT%H:%M:%S.%fZ",   # JSON encoding, UTC timezone
+    "%Y-%m-%dT%H:%M:%SZ",	   # JSON encoding, UTC timezone
+    "%Y-%m-%dT%H:%M:%S.%f%z",  # less common JSON-ish encoding
+    "%Y-%m-%dT%H:%M:%S.%f",    # for completeness, UTC default timezone
+    "%Y-%m-%d %H:%M:%S%z",	   # human readable date time with TZ
+    "%Y-%m-%d %H:%M:%S",	   # human readable date time UTC default
+    "%Y-%m-%d",                # helper to get midnight on a particular date
 )
 
+
 ##########################################################################
 ## Functions
 ##########################################################################
@@ -92,6 +94,7 @@ def datetime_to_ns(dt):
     dt_utc = aware.astimezone(pytz.utc)
     return int(dt_utc.timestamp() * 1e9)
 
+
 def to_nanoseconds(val):
     """
     Converts datetime, datetime64, float, str (RFC 2822) to nanoseconds.  If a
@@ -129,6 +132,8 @@ def to_nanoseconds(val):
     +--------------------------------+------------------------------------------+
     | %Y-%m-%d %H:%M:%S              | human readable date time UTC default     |
     +--------------------------------+------------------------------------------+
+    | %Y-%m-%d                       | midnight at a particular date            |
+    +--------------------------------+------------------------------------------+
 
     """
     if val is None or isinstance(val, int):

diff --git a/btrdb/version.py b/btrdb/version.py
@@ -18,9 +18,9 @@
 __version_info__ = {
     'major': 5,
     'minor': 5,
-    'micro': 2,
+    'micro': 3,
     'releaselevel': 'final',
-    'serial': 10,
+    'serial': 11,
 }
 
 ##########################################################################

diff --git a/...g/images/multiprocessing_architecture.png → .../figures/multiprocessing_architecture.png b/...g/images/multiprocessing_architecture.png → .../figures/multiprocessing_architecture.png
diff --git a/docs/source/_static/figures/ui_zoom.gif b/docs/source/_static/figures/ui_zoom.gif
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -209,3 +209,5 @@
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
+
+numfig = True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -209,3 +209,5 @@

		# If true, `todo` and `todoList` produce output, else they produce nothing.
		todo_include_todos = True

		numfig = True