diff --git a/btrdb/conn.py b/btrdb/conn.py index 06fbb3c..6a0c4a7 100644 --- a/btrdb/conn.py +++ b/btrdb/conn.py @@ -404,6 +404,67 @@ def list_collections(self, starts_with=""): """ return [c for some in self.ep.listCollections(starts_with) for c in some] + def _list_unique_tags_annotations(self, key, collection): + """ + Returns a SQL statement and parameters to get list of tags or annotations. + """ + if key == "annotations": + query = "select distinct({}) as {} from streams".format( + "skeys(annotations)", "annotations" + ) + else: + query = "select distinct({}) as {} from streams".format(key, key) + params = [] + if isinstance(collection, str): + params.append("{}%".format(collection)) + query = " where ".join([query, """collection like $1"""]) + return [metadata[key] for metadata in self.query(query, params)] + + def list_unique_annotations(self, collection=None): + """ + Returns a list of annotation keys used in a given collection prefix. + + Parameters + ------- + collection: str + Prefix of the collection to filter. + + Returns + ------- + annotations: list[str] + """ + return self._list_unique_tags_annotations("annotations", collection) + + def list_unique_names(self, collection=None): + """ + Returns a list of names used in a given collection prefix. + + Parameters + ------- + collection: str + Prefix of the collection to filter. + + Returns + ------- + names: list[str] + """ + return self._list_unique_tags_annotations("name", collection) + + def list_unique_units(self, collection=None): + """ + Returns a list of units used in a given collection prefix. + + Parameters + ------- + collection: str + Prefix of the collection to filter. + + Returns + ------- + units: list[str] + """ + return self._list_unique_tags_annotations("unit", collection) + @retry def streams_in_collection( self, diff --git a/btrdb/stream.py b/btrdb/stream.py index 0eed275..b302af4 100644 --- a/btrdb/stream.py +++ b/btrdb/stream.py @@ -10,7 +10,6 @@ """ Module for Stream and related classes """ -import io ########################################################################## ## Imports @@ -117,7 +116,7 @@ def refresh_metadata(self): ) = ep.streamInfo(self._uuid, False, True) self._known_to_exist = True - # deserialize annoation values + # deserialize annotation values self._annotations = { key: json.loads(val, cls=AnnotationDecoder) for key, val in self._annotations.items() @@ -323,6 +322,17 @@ def earliest( version : int, default: 0 Specify the version of the stream to query; if zero, queries the latest stream state rather than pinning to a version. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -352,6 +362,17 @@ def latest( version : int, default: 0 Specify the version of the stream to query; if zero, queries the latest stream state rather than pinning to a version. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -382,6 +403,17 @@ def current( version : int, default: 0 Specify the version of the stream to query; if zero, queries the latest stream state rather than pinning to a version. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -409,9 +441,20 @@ def tags( Parameters ---------- - refresh: bool + refresh: bool, default: False Indicates whether a round trip to the server should be implemented regardless of whether there is a local copy. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -443,9 +486,20 @@ def annotations( Parameters ---------- - refresh: bool + refresh: bool, default: False Indicates whether a round trip to the server should be implemented regardless of whether there is a local copy. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -476,7 +530,17 @@ def version( Parameters ---------- - None + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -549,6 +613,9 @@ def arrow_insert(self, data: pa.Table, merge: str = "never") -> int: int The version of the stream after inserting new points. + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not self._btrdb._ARROW_ENABLED: raise NotImplementedError(_arrow_not_impl_str.format("arrow_insert")) @@ -674,6 +741,17 @@ def update( Replace all annotations or tags with the specified dictionaries instead of performing the normal upsert operation. Specifying True is the only way to remove annotation keys. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -732,6 +810,17 @@ def delete( end : int or datetime like object The end time in nanoseconds for the range to be deleted. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -771,6 +860,17 @@ def values( :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) version: int The version of the stream to be queried + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------ @@ -822,6 +922,17 @@ def arrow_values( :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) version: int The version of the stream to be queried + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------ @@ -836,6 +947,8 @@ def arrow_values( from a sensor). This is the lowest level of data with the finest time granularity. In the tree data structure of BTrDB, this data is stored in the vector nodes. + + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not self._btrdb._ARROW_ENABLED: raise NotImplementedError(_arrow_not_impl_str.format("arrow_values")) @@ -898,6 +1011,17 @@ def aligned_windows( Specify the number of ns between data points (2**pointwidth) version : int Version of the stream to query + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -952,16 +1076,27 @@ def arrow_aligned_windows( Parameters ---------- - start : int or datetime like object + start : int or datetime like object, required The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) - end : int or datetime like object + end : int or datetime like object, required The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) - pointwidth : int + pointwidth : int, required Specify the number of ns between data points (2**pointwidth) version : int Version of the stream to query + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -972,6 +1107,8 @@ def arrow_aligned_windows( ----- As the window-width is a power-of-two, it aligns with BTrDB internal tree data structure and is faster to execute than `windows()`. + + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not self._btrdb._ARROW_ENABLED: raise NotImplementedError( @@ -1032,6 +1169,17 @@ def windows( The number of nanoseconds in each window. version : int The version of the stream to query. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -1054,6 +1202,7 @@ def windows( parameter previously available has been deprecated. The only valid value for depth is now 0. + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ materialized = [] start = to_nanoseconds(start) @@ -1091,6 +1240,17 @@ def arrow_windows( The number of nanoseconds in each window. version : int, default=0, optional The version of the stream to query. + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -1110,6 +1270,7 @@ def arrow_windows( end = start + width * floordiv(end - start, width). The `depth` parameter previously available has been deprecated. The only valid value for depth is now 0. + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not self._btrdb._ARROW_ENABLED: raise NotImplementedError(_arrow_not_impl_str.format("arrow_windows")) @@ -1170,6 +1331,17 @@ def nearest( Version of the stream to use in search backward : boolean True to search backwards from time, else false for forward + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False Returns ------- @@ -1207,11 +1379,24 @@ def obliterate( Obliterate deletes a stream from the BTrDB server. An exception will be raised if the stream could not be found. + Parameters + ---------- + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False + Raises ------ BTrDBError [404] stream does not exist The stream could not be found in BTrDB - """ self._btrdb.ep.obliterate(self._uuid) @@ -1226,6 +1411,19 @@ def flush( """ Flush writes the stream buffers out to persistent storage. + Parameters + ---------- + auto_retry: bool, default: False + Whether to retry this request in the event of an error + retries: int, default: 5 + Number of times to retry this request if there is an error. Will + be ignored if auto_retry is False + retry_delay: int, default: 3 + initial time to wait before retrying function call if there is an error. + Will be ignored if auto_retry is False + retry_backoff: int, default: 4 + Exponential factor by which the backoff increases between retries. + Will be ignored if auto_retry is False """ self._btrdb.ep.flush(self._uuid) @@ -1848,6 +2046,7 @@ def arrow_insert(self, data_map: dict, merge: str = "never") -> dict: Notes ----- BTrDB expects datetimes to be in UTC+0. + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL Returns ------- @@ -1895,7 +2094,12 @@ def values(self): def arrow_values( self, ): - """Return a pyarrow table of stream values based on the streamset parameters.""" + """Return a pyarrow table of stream values based on the streamset parameters. + + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL + """ params = self._params_from_filters() versions = self._pinned_versions if versions is None: diff --git a/btrdb/transformers.py b/btrdb/transformers.py index b2aaa28..4acbc40 100644 --- a/btrdb/transformers.py +++ b/btrdb/transformers.py @@ -117,6 +117,10 @@ def arrow_to_series(streamset, agg="mean", name_callable=None): name_callable : lambda, default: lambda s: s.collection + "/" + s.name Specify a callable that can be used to determine the series name given a Stream object. + + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( @@ -153,7 +157,11 @@ def arrow_to_dataframe( name_callable : lambda, default: lambda s: s.collection + "/" + s.name Specify a callable that can be used to determine the series name given a - Stream object. This is not compatible with agg == "all" at this time + Stream object. + + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( @@ -306,7 +314,7 @@ def arrow_to_polars(streamset, agg=None, name_callable=None): Notes ----- - This requires a BTrDB server that has arrow support enabled. + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( @@ -329,6 +337,12 @@ def arrow_to_polars(streamset, agg=None, name_callable=None): def arrow_to_arrow_table(streamset): + """Return a pyarrow table of data. + + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL + """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( "arrow_to_arrow_table requires an arrow-enabled BTrDB server." @@ -443,7 +457,7 @@ def arrow_to_numpy(streamset, agg=None): ----- This method first converts to a pandas data frame then to a numpy array. - This method requires the btrdb server to have arrow support enabled. + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( @@ -513,6 +527,9 @@ def arrow_to_dict(streamset, agg=None, name_callable=None): Specify a callable that can be used to determine the series name given a Stream object. + Notes + ----- + ARROW ENABLED SERVERS REQUIRED - CHANGE ME FOR FINAL """ if not streamset._btrdb._ARROW_ENABLED: raise NotImplementedError( diff --git a/docs/requirements.txt b/docs/requirements.txt index 4da8205..a3bb1f5 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ alabaster>=0.7.12 Sphinx>=1.7 +sphinx-rtd-theme diff --git a/docs/source/api/transformers.rst b/docs/source/api/transformers.rst index 289a1ea..facbbc4 100644 --- a/docs/source/api/transformers.rst +++ b/docs/source/api/transformers.rst @@ -1,17 +1,30 @@ btrdb.transformers ========================= -A number of tranformation and serialization functions have been developed so +A number of transformation and serialization functions have been developed so you can use the data in the format of your choice. These functions are provided -in the :code:`btrdb.utils.transformers` module but are also available directly -off the the :code:`StreamSet` class. +in the :code:`StreamSet` class. .. automodule:: btrdb.transformers +.. autofunction:: arrow_to_dict + +.. autofunction:: arrow_to_numpy + +.. autofunction:: arrow_to_series + +.. autofunction:: arrow_to_dataframe + +.. autofunction:: arrow_to_polars + +.. autofunction:: arrow_to_arrow_table + .. autofunction:: to_dict .. autofunction:: to_array +.. autofunction:: to_polars + .. autofunction:: to_series .. autofunction:: to_dataframe diff --git a/docs/source/conf.py b/docs/source/conf.py index 9801730..a2ce391 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -51,6 +51,7 @@ "sphinx.ext.todo", "sphinx.ext.githubpages", "sphinx.ext.intersphinx", + "numpydoc", ] # Add any paths that contain templates here, relative to this directory. @@ -70,7 +71,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = "en" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -86,7 +87,8 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "alabaster" +# html_theme = "alabaster" +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the