influxdb_client/client/write/point.py

"""Point data structure to represent LineProtocol."""

import math
import warnings
from builtins import int
from datetime import datetime, timedelta, timezone
from decimal import Decimal
from numbers import Integral

from influxdb_client.client.util.date_utils import get_date_helper
from influxdb_client.domain.write_precision import WritePrecision

EPOCH = datetime.fromtimestamp(0, tz=timezone.utc)

DEFAULT_WRITE_PRECISION = WritePrecision.NS

_ESCAPE_MEASUREMENT = str.maketrans({
    ',': r'\,',
    ' ': r'\ ',
    '\n': r'\n',
    '\t': r'\t',
    '\r': r'\r',
})

_ESCAPE_KEY = str.maketrans({
    ',': r'\,',
    '=': r'\=',
    ' ': r'\ ',
    '\n': r'\n',
    '\t': r'\t',
    '\r': r'\r',
})

_ESCAPE_STRING = str.maketrans({
    '"': r'\"',
    '\\': r'\\',
})

try:
    import numpy as np

    _HAS_NUMPY = True
except ModuleNotFoundError:
    _HAS_NUMPY = False


class Point(object):
    """
    Point defines the values that will be written to the database.

    Ref: https://docs.influxdata.com/influxdb/latest/reference/key-concepts/data-elements/#point
    """

    @staticmethod
    def measurement(measurement):
        """Create a new Point with specified measurement name."""
        p = Point(measurement)
        return p

    @staticmethod
    def from_dict(dictionary: dict, write_precision: WritePrecision = DEFAULT_WRITE_PRECISION, **kwargs):
        """
        Initialize point from 'dict' structure.

        The expected dict structure is:
            - measurement
            - tags
            - fields
            - time

        Example:
            .. code-block:: python

                # Use default dictionary structure
                dict_structure = {
                    "measurement": "h2o_feet",
                    "tags": {"location": "coyote_creek"},
                    "fields": {"water_level": 1.0},
                    "time": 1
                }
                point = Point.from_dict(dict_structure, WritePrecision.NS)

        Example:
            .. code-block:: python

                # Use custom dictionary structure
                dictionary = {
                    "name": "sensor_pt859",
                    "location": "warehouse_125",
                    "version": "2021.06.05.5874",
                    "pressure": 125,
                    "temperature": 10,
                    "created": 1632208639,
                }
                point = Point.from_dict(dictionary,
                                        write_precision=WritePrecision.S,
                                        record_measurement_key="name",
                                        record_time_key="created",
                                        record_tag_keys=["location", "version"],
                                        record_field_keys=["pressure", "temperature"])

        Int Types:
            The following example shows how to configure the types of integers fields.
            It is useful when you want to serialize integers always as ``float`` to avoid ``field type conflict``
            or use ``unsigned 64-bit integer`` as the type for serialization.

            .. code-block:: python

                # Use custom dictionary structure
                dict_structure = {
                    "measurement": "h2o_feet",
                    "tags": {"location": "coyote_creek"},
                    "fields": {
                        "water_level": 1.0,
                        "some_counter": 108913123234
                    },
                    "time": 1
                }

                point = Point.from_dict(dict_structure, field_types={"some_counter": "uint"})

        :param dictionary: dictionary for serialize into data Point
        :param write_precision: sets the precision for the supplied time values
        :key record_measurement_key: key of dictionary with specified measurement
        :key record_measurement_name: static measurement name for data Point
        :key record_time_key: key of dictionary with specified timestamp
        :key record_tag_keys: list of dictionary keys to use as a tag
        :key record_field_keys: list of dictionary keys to use as a field
        :key field_types: optional dictionary to specify types of serialized fields. Currently, is supported customization for integer types.
                          Possible integers types:
                            - ``int`` - serialize integers as "**Signed 64-bit integers**" - ``9223372036854775807i`` (default behaviour)
                            - ``uint`` - serialize integers as "**Unsigned 64-bit integers**" - ``9223372036854775807u``
                            - ``float`` - serialize integers as "**IEEE-754 64-bit floating-point numbers**". Useful for unify number types in your pipeline to avoid field type conflict - ``9223372036854775807``
                          The ``field_types`` can be also specified as part of incoming dictionary. For more info see an example above.
        :return: new data point
        """  # noqa: E501
        measurement_ = kwargs.get('record_measurement_name', None)
        if measurement_ is None:
            measurement_ = dictionary[kwargs.get('record_measurement_key', 'measurement')]
        point = Point(measurement_)

        record_tag_keys = kwargs.get('record_tag_keys', None)
        if record_tag_keys is not None:
            for tag_key in record_tag_keys:
                if tag_key in dictionary:
                    point.tag(tag_key, dictionary[tag_key])
        elif 'tags' in dictionary:
            for tag_key, tag_value in dictionary['tags'].items():
                point.tag(tag_key, tag_value)

        record_field_keys = kwargs.get('record_field_keys', None)
        if record_field_keys is not None:
            for field_key in record_field_keys:
                if field_key in dictionary:
                    point.field(field_key, dictionary[field_key])
        else:
            for field_key, field_value in dictionary['fields'].items():
                point.field(field_key, field_value)

        record_time_key = kwargs.get('record_time_key', 'time')
        if record_time_key in dictionary:
            point.time(dictionary[record_time_key], write_precision=write_precision)

        _field_types = kwargs.get('field_types', {})
        if 'field_types' in dictionary:
            _field_types = dictionary['field_types']
        # Map API fields types to Line Protocol types postfix:
        # - int: 'i'
        # - uint: 'u'
        # - float: ''
        point._field_types = dict(map(
            lambda item: (item[0], 'i' if item[1] == 'int' else 'u' if item[1] == 'uint' else ''),
            _field_types.items()
        ))

        return point

    def __init__(self, measurement_name):
        """Initialize defaults."""
        self._tags = {}
        self._fields = {}
        self._name = measurement_name
        self._time = None
        self._write_precision = DEFAULT_WRITE_PRECISION
        self._field_types = {}

    def time(self, time, write_precision=DEFAULT_WRITE_PRECISION):
        """
        Specify timestamp for DataPoint with declared precision.

        If time doesn't have specified timezone we assume that timezone is UTC.

        Examples::
            Point.measurement("h2o").field("val", 1).time("2009-11-10T23:00:00.123456Z")
            Point.measurement("h2o").field("val", 1).time(1257894000123456000)
            Point.measurement("h2o").field("val", 1).time(datetime(2009, 11, 10, 23, 0, 0, 123456))
            Point.measurement("h2o").field("val", 1).time(1257894000123456000, write_precision=WritePrecision.NS)


        :param time: the timestamp for your data
        :param write_precision: sets the precision for the supplied time values
        :return: this point
        """
        self._write_precision = write_precision
        self._time = time
        return self

    def tag(self, key, value):
        """Add tag with key and value."""
        self._tags[key] = value
        return self

    def field(self, field, value):
        """Add field with key and value."""
        self._fields[field] = value
        return self

    def to_line_protocol(self, precision=None):
        """
        Create LineProtocol.

         :param precision: required precision of LineProtocol. If it's not set then use the precision from ``Point``.
        """
        _measurement = _escape_key(self._name, _ESCAPE_MEASUREMENT)
        if _measurement.startswith("#"):
            message = f"""The measurement name '{_measurement}' start with '#'.

The output Line protocol will be interpret as a comment by InfluxDB. For more info see:
    - https://docs.influxdata.com/influxdb/latest/reference/syntax/line-protocol/#comments
"""
            warnings.warn(message, SyntaxWarning)
        _tags = _append_tags(self._tags)
        _fields = _append_fields(self._fields, self._field_types)
        if not _fields:
            return ""
        _time = _append_time(self._time, self._write_precision if precision is None else precision)

        return f"{_measurement}{_tags}{_fields}{_time}"

    @property
    def write_precision(self):
        """Get precision."""
        return self._write_precision

    @classmethod
    def set_str_rep(cls, rep_function):
        """Set the string representation for all Points."""
        cls.__str___rep = rep_function

    def __str__(self):
        """Create string representation of this Point."""
        return self.to_line_protocol()

    def __eq__(self, other):
        """Return true iff other is equal to self."""
        if not isinstance(other, Point):
            return False
        # assume points are equal iff their instance fields are equal
        return (self._tags == other._tags and
                self._fields == other._fields and
                self._name == other._name and
                self._time == other._time and
                self._write_precision == other._write_precision and
                self._field_types == other._field_types)


def _append_tags(tags):
    _return = []
    for tag_key, tag_value in sorted(tags.items()):

        if tag_value is None:
            continue

        tag = _escape_key(tag_key)
        value = _escape_tag_value(tag_value)
        if tag != '' and value != '':
            _return.append(f'{tag}={value}')

    return f"{',' if _return else ''}{','.join(_return)} "


def _append_fields(fields, field_types):
    _return = []

    for field, value in sorted(fields.items()):
        if value is None:
            continue

        if isinstance(value, float) or isinstance(value, Decimal) or _np_is_subtype(value, 'float'):
            if not math.isfinite(value):
                continue
            s = str(value)
            # It's common to represent whole numbers as floats
            # and the trailing ".0" that Python produces is unnecessary
            # in line-protocol, inconsistent with other line-protocol encoders,
            # and takes more space than needed, so trim it off.
            if s.endswith('.0'):
                s = s[:-2]
            _return.append(f'{_escape_key(field)}={s}')
        elif (isinstance(value, int) or _np_is_subtype(value, 'int')) and not isinstance(value, bool):
            _type = field_types.get(field, "i")
            _return.append(f'{_escape_key(field)}={str(value)}{_type}')
        elif isinstance(value, bool):
            _return.append(f'{_escape_key(field)}={str(value).lower()}')
        elif isinstance(value, str):
            _return.append(f'{_escape_key(field)}="{_escape_string(value)}"')
        else:
            raise ValueError(f'Type: "{type(value)}" of field: "{field}" is not supported.')

    return f"{','.join(_return)}"


def _append_time(time, write_precision) -> str:
    if time is None:
        return ''
    return f" {int(_convert_timestamp(time, write_precision))}"


def _escape_key(tag, escape_list=None) -> str:
    if escape_list is None:
        escape_list = _ESCAPE_KEY
    return str(tag).translate(escape_list)


def _escape_tag_value(value) -> str:
    ret = _escape_key(value)
    if ret.endswith('\\'):
        ret += ' '
    return ret


def _escape_string(value) -> str:
    return str(value).translate(_ESCAPE_STRING)


def _convert_timestamp(timestamp, precision=DEFAULT_WRITE_PRECISION):
    date_helper = get_date_helper()
    if isinstance(timestamp, Integral):
        return timestamp  # assume precision is correct if timestamp is int

    if isinstance(timestamp, str):
        timestamp = date_helper.parse_date(timestamp)

    if isinstance(timestamp, timedelta) or isinstance(timestamp, datetime):

        if isinstance(timestamp, datetime):
            timestamp = date_helper.to_utc(timestamp) - EPOCH

        ns = date_helper.to_nanoseconds(timestamp)

        if precision is None or precision == WritePrecision.NS:
            return ns
        elif precision == WritePrecision.US:
            return ns / 1e3
        elif precision == WritePrecision.MS:
            return ns / 1e6
        elif precision == WritePrecision.S:
            return ns / 1e9

    raise ValueError(timestamp)


def _np_is_subtype(value, np_type):
    if not _HAS_NUMPY or not hasattr(value, 'dtype'):
        return False

    if np_type == 'float':
        return np.issubdtype(value, np.floating)
    elif np_type == 'int':
        return np.issubdtype(value, np.integer)
    return False