From 9ea5ac8bf40d2b58bfd5b335f20c8cdc53942034 Mon Sep 17 00:00:00 2001 From: Tim Visher Date: Mon, 22 Oct 2018 17:41:39 +0000 Subject: [PATCH] Deprecate `utils.strptime` Motivation ---------- This appears to have been deprecated for some time and is poorly behaved in the case of fractional seconds. `utils.strptime_to_utc` is preferred anyway. See https://github.com/singer-io/singer-python/issues/81 Implementation Notes -------------------- - Move to circle 2.0 - Add doctest to the nose runner - Add a docstring and warn call to utils.strptime --- .circleci/config.yml | 26 ++++++++++++++++++++ Makefile | 4 +-- bin/circle_deps | 13 ---------- bin/circle_test | 9 ------- circle.yml | 7 ------ setup.py | 2 ++ singer/messages.py | 58 ++++++++++++++++++++++---------------------- singer/metrics.py | 40 +++++++++++++++--------------- singer/utils.py | 35 +++++++++++++++++++++++--- tests/test_utils.py | 11 +++++++-- 10 files changed, 119 insertions(+), 86 deletions(-) create mode 100644 .circleci/config.yml delete mode 100755 bin/circle_deps delete mode 100755 bin/circle_test delete mode 100644 circle.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..eda5d4c --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,26 @@ +version: 2 +jobs: + build: + docker: + - image: ubuntu:16.04 + steps: + - checkout + - run: + name: 'Install python 3.5.2' + command: | + apt update + apt install --yes python3 python3-pip python3-venv + - run: + name: 'Setup virtualenv' + command: | + mkdir -p ~/.virtualenvs + python3 -m venv ~/.virtualenvs/singer-python + source ~/.virtualenvs/singer-python/bin/activate + pip install -U pip setuptools + make install + - run: + name: 'Run tests' + command: | + # Need to re-activate the virtualenv + source ~/.virtualenvs/singer-python/bin/activate + make test diff --git a/Makefile b/Makefile index f2d9f39..296c20f 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,11 @@ check_prereqs: bash -c '[[ -n $$VIRTUAL_ENV ]]' - bash -c '[[ $$(python3 --version) == *3.4.3* ]]' + bash -c '[[ $$(python3 --version) == *3.5.2* ]]' install: check_prereqs python3 -m pip install -e '.[dev]' test: install pylint singer -d missing-docstring,broad-except,bare-except,too-many-return-statements,too-many-branches,too-many-arguments,no-else-return,too-few-public-methods,fixme,protected-access - nosetests -v + nosetests --with-doctest -v diff --git a/bin/circle_deps b/bin/circle_deps deleted file mode 100755 index 4aa9771..0000000 --- a/bin/circle_deps +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -mkdir -p ~/.virtualenvs/singer-python -pyenv global 3.4.3 -python3 -m venv ~/.virtualenvs/singer-python -source ~/.virtualenvs/singer-python/bin/activate -pip install -U pip -set -x -echo $VIRTUAL_ENV -python3 --version -pip -V -set +x -make install diff --git a/bin/circle_test b/bin/circle_test deleted file mode 100755 index cd7bc0a..0000000 --- a/bin/circle_test +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash - -source ~/.virtualenvs/singer-python/bin/activate - -python --version -pip -V -pip freeze - -make diff --git a/circle.yml b/circle.yml deleted file mode 100644 index c84254b..0000000 --- a/circle.yml +++ /dev/null @@ -1,7 +0,0 @@ -dependencies: - override: - - bin/circle_deps - -test: - override: - - bin/circle_test diff --git a/setup.py b/setup.py index 0c19c67..e41e106 100755 --- a/setup.py +++ b/setup.py @@ -19,6 +19,8 @@ extras_require={ 'dev': [ 'pylint', + 'ipython', + 'ipdb', 'nose', 'singer-tools' ] diff --git a/singer/messages.py b/singer/messages.py index 8c15e04..0602cf5 100644 --- a/singer/messages.py +++ b/singer/messages.py @@ -35,9 +35,9 @@ class RecordMessage(Message): number. Note that this feature is experimental and most Taps and Targets should not need to use versioned streams. - >>> msg = singer.RecordMessage( - >>> stream='users', - >>> record={'id': 1, 'name': 'Mary'}) + msg = singer.RecordMessage( + stream='users', + record={'id': 1, 'name': 'Mary'}) ''' @@ -76,15 +76,15 @@ class SchemaMessage(Message): * schema (dict) - The JSON schema. * key_properties (list of strings) - List of primary key properties. - >>> msg = singer.SchemaMessage( - >>> stream='users', - >>> schema={'type': 'object', - >>> 'properties': { - >>> 'id': {'type': 'integer'}, - >>> 'name': {'type': 'string'} - >>> } - >>> }, - >>> key_properties=['id']) + msg = singer.SchemaMessage( + stream='users', + schema={'type': 'object', + 'properties': { + 'id': {'type': 'integer'}, + 'name': {'type': 'string'} + } + }, + key_properties=['id']) ''' def __init__(self, stream, schema, key_properties, bookmark_properties=None): @@ -118,8 +118,8 @@ class StateMessage(Message): * value (dict) - The value of the state. - >>> msg = singer.StateMessage( - >>> value={'users': '2017-06-19T00:00:00'}) + msg = singer.StateMessage( + value={'users': '2017-06-19T00:00:00'}) ''' def __init__(self, value): @@ -148,9 +148,9 @@ class ActivateVersionMessage(Message): not need to use the "version" field of "RECORD" messages or the "ACTIVATE_VERSION" message at all. - >>> msg = singer.ActivateVersionMessage( - >>> stream='users', - >>> version=2) + msg = singer.ActivateVersionMessage( + stream='users', + version=2) ''' def __init__(self, stream, version): @@ -221,7 +221,7 @@ def write_message(message): def write_record(stream_name, record, stream_alias=None, time_extracted=None): """Write a single record for the given stream. - >>> write_record("users", {"id": 2, "email": "mike@stitchdata.com"}) + write_record("users", {"id": 2, "email": "mike@stitchdata.com"}) """ write_message(RecordMessage(stream=(stream_alias or stream_name), record=record, @@ -231,9 +231,9 @@ def write_record(stream_name, record, stream_alias=None, time_extracted=None): def write_records(stream_name, records): """Write a list of records for the given stream. - >>> chris = {"id": 1, "email": "chris@stitchdata.com"} - >>> mike = {"id": 2, "email": "mike@stitchdata.com"} - >>> write_records("users", [chris, mike]) + chris = {"id": 1, "email": "chris@stitchdata.com"} + mike = {"id": 2, "email": "mike@stitchdata.com"} + write_records("users", [chris, mike]) """ for record in records: write_record(stream_name, record) @@ -242,10 +242,10 @@ def write_records(stream_name, records): def write_schema(stream_name, schema, key_properties, bookmark_properties=None, stream_alias=None): """Write a schema message. - >>> stream = 'test' - >>> schema = {'properties': {'id': {'type': 'integer'}, 'email': {'type': 'string'}}} # nopep8 - >>> key_properties = ['id'] - >>> write_schema(stream, schema, key_properties) + stream = 'test' + schema = {'properties': {'id': {'type': 'integer'}, 'email': {'type': 'string'}}} # nopep8 + key_properties = ['id'] + write_schema(stream, schema, key_properties) """ if isinstance(key_properties, (str, bytes)): key_properties = [key_properties] @@ -263,7 +263,7 @@ def write_schema(stream_name, schema, key_properties, bookmark_properties=None, def write_state(value): """Write a state message. - >>> write_state({'last_updated_at': '2017-02-14T09:21:00'}) + write_state({'last_updated_at': '2017-02-14T09:21:00'}) """ write_message(StateMessage(value=value)) @@ -271,8 +271,8 @@ def write_state(value): def write_version(stream_name, version): """Write an activate version message. - >>> stream = 'test' - >>> version = int(time.time()) - >>> write_version(stream, version) + stream = 'test' + version = int(time.time()) + write_version(stream, version) """ write_message(ActivateVersionMessage(stream_name, version)) diff --git a/singer/metrics.py b/singer/metrics.py index 911943a..db32a41 100644 --- a/singer/metrics.py +++ b/singer/metrics.py @@ -10,10 +10,10 @@ since the last time it reported. For example, to increment a record count for records from a "users" endpoint, you could do: - >>> with Counter('record_count', {'endpoint': 'users'}) as counter: - >>> for record in my_records: - >>> # Do stuff... - >>> counter.increment() + with Counter('record_count', {'endpoint': 'users'}) as counter: + for record in my_records: + # Do stuff... + counter.increment() Timer is class that allows you to track the timing of operations. Like Counter, you initialize it as a context manager, with a metric name and a @@ -22,8 +22,8 @@ automatically include a tag called "status" that is set to "failed" if an Exception was raised, or "succeeded" otherwise. - >>> with Timer('http_request_duration', {'endpoint': 'users'}): - >>> # Make a request, do some things + with Timer('http_request_duration', {'endpoint': 'users'}): + # Make a request, do some things In order to encourage consistent metric and tag names, this module provides several functions for creating Counters and Timers for very @@ -95,10 +95,10 @@ class Counter(): exits. The only thing you need to do is initialize the Counter and then call increment(). - >>> with singer.metrics.Counter('record_count', {'endpoint': 'users'}) as counter: - >>> for user in get_users(...): - >>> # Print out the user - >>> counter.increment() + with singer.metrics.Counter('record_count', {'endpoint': 'users'}) as counter: + for user in get_users(...): + # Print out the user + counter.increment() This would print a metric like this: @@ -154,8 +154,8 @@ class Timer(): # pylint: disable=too-few-public-methods context exits with an Exception or "success" if it exits cleanly. You can override this by setting timer.status within the context. - >>> with singer.metrics.Timer('request_duration', {'endpoint': 'users'}): - >>> # Do some stuff + with singer.metrics.Timer('request_duration', {'endpoint': 'users'}): + # Do some stuff This produces a metric like this: @@ -196,10 +196,10 @@ def __exit__(self, exc_type, exc_value, traceback): def record_counter(endpoint=None, log_interval=DEFAULT_LOG_INTERVAL): '''Use for counting records retrieved from the source. - >>> with singer.metrics.record_counter(endpoint="users") as counter: - >>> for record in my_records: - >>> # Do something with the record - >>> counter.increment() + with singer.metrics.record_counter(endpoint="users") as counter: + for record in my_records: + # Do something with the record + counter.increment() ''' tags = {} if endpoint: @@ -210,8 +210,8 @@ def record_counter(endpoint=None, log_interval=DEFAULT_LOG_INTERVAL): def http_request_timer(endpoint): '''Use for timing HTTP requests to an endpoint - >>> with singer.metrics.http_request_timer("users") as timer: - >>> # Make a request + with singer.metrics.http_request_timer("users") as timer: + # Make a request ''' tags = {} if endpoint: @@ -222,8 +222,8 @@ def http_request_timer(endpoint): def job_timer(job_type=None): '''Use for timing asynchronous jobs - >>> with singer.metrics.job_timer(job_type="users") as timer: - >>> # Make a request + with singer.metrics.job_timer(job_type="users") as timer: + # Make a request ''' tags = {} if job_type: diff --git a/singer/utils.py b/singer/utils.py index 81944fe..6f13cce 100644 --- a/singer/utils.py +++ b/singer/utils.py @@ -4,6 +4,8 @@ import functools import json import time +from warnings import warn + import dateutil.parser import pytz import backoff as backoff_module @@ -25,10 +27,35 @@ def strptime_with_tz(dtime): return d_object def strptime(dtime): - try: - return datetime.datetime.strptime(dtime, DATETIME_FMT) - except Exception: - return datetime.datetime.strptime(dtime, DATETIME_PARSE) + """DEPRECATED Use strptime_to_utc instead. + + Parse DTIME according to DATETIME_PARSE without TZ safety. + + >>> strptime("2018-01-01T00:00:00Z") + datetime.datetime(2018, 1, 1, 0, 0) + + Requires the Z TZ signifier + >>> strptime("2018-01-01T00:00:00") + Traceback (most recent call last): + ... + ValueError: time data '2018-01-01T00:00:00' does not match format '%Y-%m-%dT%H:%M:%SZ' + + Can't parse non-UTC DTs + >>> strptime("2018-01-01T00:00:00-04:00") + Traceback (most recent call last): + ... + ValueError: time data '2018-01-01T00:00:00-04:00' does not match format '%Y-%m-%dT%H:%M:%SZ' + + Does not support fractional seconds + >>> strptime("2018-01-01T00:00:00.000000Z") + Traceback (most recent call last): + ... + ValueError: time data '2018-01-01T00:00:00.000000Z' does not match format '%Y-%m-%dT%H:%M:%SZ' + """ + + warn("Use strptime_to_utc instead", DeprecationWarning, stacklevel=2) + + return datetime.datetime.strptime(dtime, DATETIME_PARSE) def strptime_to_utc(dtimestr): d_object = dateutil.parser.parse(dtimestr) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4ef53f0..bb26da6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,15 +1,22 @@ import unittest from datetime import datetime as dt -from datetime import timezone as tz +import pytz import logging import singer.utils as u class TestFormat(unittest.TestCase): def test_small_years(self): - self.assertEqual(u.strftime(dt(90, 1, 1, tzinfo=tz.utc)), + self.assertEqual(u.strftime(dt(90, 1, 1, tzinfo=pytz.UTC)), "0090-01-01T00:00:00.000000Z") + def test_round_trip(self): + now = dt.utcnow().replace(tzinfo=pytz.UTC) + dtime = u.strftime(now) + pdtime = u.strptime_to_utc(dtime) + fdtime = u.strftime(pdtime) + self.assertEqual(dtime, fdtime) + class TestHandleException(unittest.TestCase): def setUp(self):