blue-yonder · nils-braun · Dec 3, 2019 · Nov 26, 2019 · Nov 26, 2019 · Nov 26, 2019
diff --git a/.github/workflows/stylecheck.yml b/.github/workflows/stylecheck.yml
@@ -0,0 +1,13 @@
+on: [pull_request]
+name: Python Style Check
+jobs:
+  pycodestyle:
+    name: pycodestyle
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@master
+    - name: pycodestyle
+      uses: ankitvgupta/pycodestyle-action@master
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        PRECOMMAND_MESSAGE: You have style errors. See them below.
diff --git a/docs/conf.py b/docs/conf.py
@@ -8,6 +8,7 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
+import datetime
 import sys
 
 # If extensions (or modules to document with autodoc) are in another directory,
@@ -61,7 +62,6 @@
 master_doc = 'index'
 
 # General information about the project.
-import datetime
 now = datetime.datetime.today()
 project = 'tsfresh'
 copyright = '2016-{}, Maximilian Christ et al./ Blue Yonder GmbH'.format(now.year)
@@ -205,21 +205,21 @@
 # -- Options for LaTeX output --------------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-# 'papersize': 'letterpaper',
+    # The paper size ('letterpaper' or 'a4paper').
+    # 'papersize': 'letterpaper',
 
-# The font size ('10pt', '11pt' or '12pt').
-# 'pointsize': '10pt',
+    # The font size ('10pt', '11pt' or '12pt').
+    # 'pointsize': '10pt',
 
-# Additional stuff for the LaTeX preamble.
-# 'preamble': '',
+    # Additional stuff for the LaTeX preamble.
+    # 'preamble': '',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'user_guide.tex', 'tsfresh Documentation',
-   '', 'manual'),
+    ('index', 'user_guide.tex', 'tsfresh Documentation',
+     '', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of

diff --git a/setup.cfg b/setup.cfg
@@ -89,3 +89,6 @@ no-vcs = 1
 formats = bdist_wheel
 # do not upload the docs as we host them on read-the-docs
 with-docs = 0
+
+[pycodestyle]
+max-line-length = 120
diff --git a/tests/integrations/dask-worker-space/global.lock b/tests/integrations/dask-worker-space/global.lock
diff --git a/tests/integrations/dask-worker-space/purge.lock b/tests/integrations/dask-worker-space/purge.lock
diff --git a/tests/integrations/examples/test_driftbif_simulation.py b/tests/integrations/examples/test_driftbif_simulation.py
@@ -30,9 +30,10 @@ def test_relaxation_dynamics(self):
         k3t = ds.kappa_3 * ds.tau
         k3st = ds.kappa_3 ** 2 * ds.tau
         a0 = v0 / ds.kappa_3
-        acceleration = lambda t: ds.kappa_3 * (a0 * np.sqrt(k3t - 1) * np.exp(k3st * t) /
-                                               np.sqrt(np.exp(2.0 * k3st * t) * ds.Q * a0 ** 2 +
-                                                       np.exp(2.0 * ds.kappa_3 * t) * (k3t - 1 - ds.Q * a0 ** 2)))
+
+        def acceleration(t): return ds.kappa_3 * (a0 * np.sqrt(k3t - 1) * np.exp(k3st * t) /
+                                                  np.sqrt(np.exp(2.0 * k3st * t) * ds.Q * a0 ** 2 +
+                                                          np.exp(2.0 * ds.kappa_3 * t) * (k3t - 1 - ds.Q * a0 ** 2)))
         t = ds.delta_t * np.arange(Nt)
         return np.testing.assert_array_almost_equal(v[:, 0], np.vectorize(acceleration)(t),
                                                     decimal=8)
@@ -55,7 +56,8 @@ def test_dimensionality(self):
         Nt = 10
         v = ds.simulate(Nt)
         self.assertEqual(v.shape, (Nt, 2),
-                         'The default configuration should return velocities from a two-dimensional dissipative soliton.')
+                         "The default configuration should return velocities "
+                         "from a two-dimensional dissipative soliton.")
 
         v = ds.simulate(Nt, v0=np.zeros(3))
         self.assertEqual(v.shape, (Nt, 3),

diff --git a/tests/integrations/examples/test_har_dataset.py b/tests/integrations/examples/test_har_dataset.py
@@ -6,6 +6,7 @@
 from tsfresh.examples.har_dataset import download_har_dataset, load_har_dataset, load_har_classes
 from pandas import DataFrame, Series
 
+
 class HumanActivityTestCase(TestCase):
     def setUp(self):
         download_har_dataset()
@@ -20,4 +21,4 @@ def test_characteristics_downloaded_robot_execution_failures(self):
         self.assertIsInstance(self.classes, Series)
 
     def test_index(self):
-        self.assertCountEqual(self.data.index, self.classes.index)
+        self.assertCountEqual(self.data.index, self.classes.index)
diff --git a/tests/integrations/test_full_pipeline.py b/tests/integrations/test_full_pipeline.py
@@ -50,4 +50,4 @@ def test_relevant_extraction(self):
                                   'F_x__variance_larger_than_standard_deviation'}
 
         self.assertGreaterEqual(set(extracted_features.columns), some_expected_features)
-        self.assertGreater(len(extracted_features), 0)
+        self.assertGreater(len(extracted_features), 0)
diff --git a/tests/integrations/test_notebooks.py b/tests/integrations/test_notebooks.py
@@ -28,15 +28,15 @@ def _notebook_run(path, timeout=default_timeout):
     try:
         if os.environ['TRAVIS']:
             return [], []
-    except:
+    except BaseException:
         pass
 
-    # Ensure temporary files are not auto-deleted as processes have limited 
+    # Ensure temporary files are not auto-deleted as processes have limited
     # permissions to re-use file handles under WinNT-based operating systems.
     fname = ''
     with tempfile.NamedTemporaryFile(mode='w+t', suffix=".ipynb", delete=False) as fout:
         fname = fout.name
-        
+
         args = ["jupyter", "nbconvert",
                 "--to", "notebook", "--execute", execproc_timeout]
         args += ["--ExecutePreprocessor.kernel_name=python3"]
@@ -48,7 +48,7 @@ def _notebook_run(path, timeout=default_timeout):
     os.remove(fname)
 
     errors = [output for cell in nb.cells if "outputs" in cell
-              for output in cell["outputs"] \
+              for output in cell["outputs"]
               if output.output_type == "error"]
     return nb, errors
 

diff --git a/tests/integrations/test_relevant_feature_extraction.py b/tests/integrations/test_relevant_feature_extraction.py
@@ -11,6 +11,7 @@
 import pandas.util.testing as pdt
 import pandas as pd
 
+
 class RelevantFeatureExtractionDataTestCase(DataTestCase):
     """
     Test case for the relevant_feature_extraction function
@@ -109,5 +110,3 @@ def test_raises_y_not_more_than_one_label(self):
         df_dict = {"a": pd.DataFrame({"val": [1, 2, 3, 4, 10, 11], "id": [1, 1, 1, 1, 2, 2]}),
                    "b": pd.DataFrame({"val": [5, 6, 7, 8, 12, 13], "id": [4, 4, 3, 3, 2, 2]})}
         self.assertRaises(AssertionError, extract_relevant_features, df_dict, y, None, None, None, "id", None, "val")
-
-
diff --git a/tests/units/feature_extraction/test_extraction.py b/tests/units/feature_extraction/test_extraction.py
@@ -280,13 +280,17 @@ def test_simple_data_sample_four_timeseries(self):
         df.sort_values(by=["id", "kind", "sort"], inplace=True)
 
         result = generate_data_chunk_format(df, "id", "kind", "val")
-        expected = [(10, 'a', pd.Series([36, 71, 27, 62, 56, 58, 67, 11, 2, 24, 45, 30, 0, 9, 41, 28, 33, 19, 29, 43],
-                                        index=[10]*20, name="val")),
-                    (10, 'b', pd.Series([78, 37, 23, 44, 6, 3, 21, 61, 39, 31, 53, 16, 66, 50, 40, 47, 7, 42, 38, 55],
-                                        index=[10] *20, name="val")),
-                    (500, 'a', pd.Series([76, 72, 74, 75, 32, 64, 46, 35, 15, 70, 57, 65, 51, 26, 5, 25, 10, 69, 73, 77],
-                                         index=[500]*20, name="val")),
-                    (500, 'b', pd.Series([8, 60, 12, 68, 22, 17, 18, 63, 49, 34, 20, 52, 48, 14, 79, 4, 1, 59, 54, 13],
-                                         index=[500] *20, name="val"))]
+        expected = [(10, 'a', pd.Series([36, 71, 27, 62, 56, 58, 67, 11, 2, 24, 45, 30, 0,
+                                        9, 41, 28, 33, 19, 29, 43],
+                                        index=[10] * 20, name="val")),
+                    (10, 'b', pd.Series([78, 37, 23, 44, 6, 3, 21, 61, 39, 31, 53, 16, 66,
+                                         50, 40, 47, 7, 42, 38, 55],
+                                        index=[10] * 20, name="val")),
+                    (500, 'a', pd.Series([76, 72, 74, 75, 32, 64, 46, 35, 15, 70, 57, 65,
+                                          51, 26, 5, 25, 10, 69, 73, 77],
+                                         index=[500] * 20, name="val")),
+                    (500, 'b', pd.Series([8, 60, 12, 68, 22, 17, 18, 63, 49, 34, 20, 52,
+                                          48, 14, 79, 4, 1, 59, 54, 13],
+                                         index=[500] * 20, name="val"))]
 
         self.assert_data_chunk_object_equal(result, expected)
diff --git a/tests/units/feature_extraction/test_feature_calculations.py b/tests/units/feature_extraction/test_feature_calculations.py
@@ -45,8 +45,10 @@ def assertFalseOnAllArrayTypes(self, f, input_to_f, *args, **kwargs):
 
     def assertAllFalseOnAllArrayTypes(self, f, input_to_f, *args, **kwargs):
         self.assertFalse(any(dict(f(input_to_f, *args, **kwargs)).values()), msg="Not false for lists")
-        self.assertFalse(any(dict(f(np.array(input_to_f), *args, **kwargs)).values()), msg="Not false for numpy.arrays")
-        self.assertFalse(any(dict(f(pd.Series(input_to_f), *args, **kwargs)).values()), msg="Not false for pandas.Series")
+        self.assertFalse(any(dict(f(np.array(input_to_f), *args, **kwargs)).values()),
+                         msg="Not false for numpy.arrays")
+        self.assertFalse(any(dict(f(pd.Series(input_to_f), *args, **kwargs)).values()),
+                         msg="Not false for pandas.Series")
 
     def assertAlmostEqualOnAllArrayTypes(self, f, input_t_f, result, *args, **kwargs):
         self.assertAlmostEqual(f(input_t_f, *args, **kwargs), result,
@@ -71,7 +73,7 @@ def assertEqualPandasSeriesWrapper(self, f, input_to_f, result, *args, **kwargs)
     def test__roll(self):
         x = np.random.normal(size=30)
         for shift in [0, 1, 10, 11, 30, 31, 50, 51, 150, 151]:
-            np.testing.assert_array_equal(_roll(x,  shift), np.roll(x,  shift))
+            np.testing.assert_array_equal(_roll(x, shift), np.roll(x, shift))
             np.testing.assert_array_equal(_roll(x, -shift), np.roll(x, -shift))
 
     def test___get_length_sequences_where(self):
@@ -99,7 +101,7 @@ def test_large_standard_deviation(self):
 
     def test_symmetry_looking(self):
         self.assertAllTrueOnAllArrayTypes(symmetry_looking, [-1, -1, 1, 1],
-                                           [dict(r=0.05), dict(r=0.75)])
+                                          [dict(r=0.05), dict(r=0.75)])
         self.assertAllFalseOnAllArrayTypes(symmetry_looking, [-1, -1, 1, 1], [dict(r=0)])
         self.assertAllFalseOnAllArrayTypes(symmetry_looking, [-1, -1, -1, -1, 1], [dict(r=0.05)])
         self.assertAllTrueOnAllArrayTypes(symmetry_looking, [-2, -2, -2, -1, -1, -1], [dict(r=0.05)])
@@ -240,7 +242,6 @@ def test_partial_autocorrelation(self):
             else:
                 self.assertIsNaN(lag_val)
 
-
     def test_augmented_dickey_fuller(self):
         # todo: add unit test for the values of the test statistic
 
@@ -266,7 +267,7 @@ def test_augmented_dickey_fuller(self):
         x = [0] * m
         x[0] = 100
         for i in range(1, m):
-            x[i] = x[i-1] * 0.5 + e[i]
+            x[i] = x[i - 1] * 0.5 + e[i]
         param = [{"attr": "teststat"}, {"attr": "pvalue"}, {"attr": "usedlag"}]
         expected_index = ['attr_"teststat"', 'attr_"pvalue"', 'attr_"usedlag"']
 
@@ -302,10 +303,10 @@ def test_cid_ce(self):
 
     def test_ratio_beyond_r_sigma(self):
 
-        x = [0, 1]*10 + [10, 20, -30] # std of x is 7.21, mean 3.04
-        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 3./len(x), r=1)
-        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 2./len(x), r=2)
-        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 1./len(x), r=3)
+        x = [0, 1] * 10 + [10, 20, -30]  # std of x is 7.21, mean 3.04
+        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 3. / len(x), r=1)
+        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 2. / len(x), r=2)
+        self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 1. / len(x), r=3)
         self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 0, r=20)
 
     def test_mean_abs_change(self):
@@ -472,7 +473,7 @@ def test_fft_coefficient(self):
         param = [{"coeff": 0, "attr": "real"}, {"coeff": 1, "attr": "real"}, {"coeff": 2, "attr": "real"},
                  {"coeff": 0, "attr": "imag"}, {"coeff": 1, "attr": "imag"}, {"coeff": 2, "attr": "imag"},
                  {"coeff": 0, "attr": "angle"}, {"coeff": 1, "attr": "angle"}, {"coeff": 2, "attr": "angle"},
-                 {"coeff": 0, "attr": "abs"}, {"coeff": 1, "attr": "abs"}, {"coeff": 2, "attr": "abs"} ]
+                 {"coeff": 0, "attr": "abs"}, {"coeff": 1, "attr": "abs"}, {"coeff": 2, "attr": "abs"}]
         expected_index = ['coeff_0__attr_"real"', 'coeff_1__attr_"real"', 'coeff_2__attr_"real"',
                           'coeff_0__attr_"imag"', 'coeff_1__attr_"imag"', 'coeff_2__attr_"imag"',
                           'coeff_0__attr_"angle"', 'coeff_1__attr_"angle"', 'coeff_2__attr_"angle"',
@@ -524,7 +525,7 @@ def test_fft_aggregated(self):
         self.assertAlmostEqual(res['aggtype_"kurtosis"'], 3.643, places=3)
 
         # Scalar multiplying the distribution should not change the results:
-        x = 10*x
+        x = 10 * x
         res = pd.Series(dict(fft_aggregated(x, param)))
         self.assertCountEqual(list(res.index), expected_index)
         self.assertAlmostEqual(res['aggtype_"centroid"'], 1.135, places=3)
@@ -545,15 +546,17 @@ def test_fft_aggregated(self):
 
         # Gaussian test:
         def normal(y, mean_, sigma_):
-            return 1/(2 * np.pi * sigma_ ** 2) * np.exp(-(y - mean_) ** 2 / (2 * sigma_ ** 2))
-        mean_ = 500.; sigma_ = 1.; range_ = int(2*mean_)
+            return 1 / (2 * np.pi * sigma_ ** 2) * np.exp(-(y - mean_) ** 2 / (2 * sigma_ ** 2))
+        mean_ = 500.
+        sigma_ = 1.
+        range_ = int(2 * mean_)
         x = list(map(lambda x: normal(x, mean_, sigma_), range(range_)))
 
         # The fourier transform of a Normal dist in the positive halfspace is a half normal,
         # Hand calculated values of centroid and variance based for the half-normal dist:
         # (Ref: https://en.wikipedia.org/wiki/Half-normal_distribution)
-        expected_fft_centroid = (range_/(2*np.pi*sigma_))*np.sqrt(2/np.pi)
-        expected_fft_var = (range_/(2*np.pi*sigma_))**2*(1-2/np.pi)
+        expected_fft_centroid = (range_ / (2 * np.pi * sigma_)) * np.sqrt(2 / np.pi)
+        expected_fft_var = (range_ / (2 * np.pi * sigma_))**2 * (1 - 2 / np.pi)
 
         # Calculate values for unit test:
         res = pd.Series(dict(fft_aggregated(x, param)))
@@ -563,11 +566,11 @@ def normal(y, mean_, sigma_):
         rel_diff_allowed = 0.02
         self.assertAlmostEqual(
             res['aggtype_"centroid"'], expected_fft_centroid,
-            delta=rel_diff_allowed*expected_fft_centroid
+            delta=rel_diff_allowed * expected_fft_centroid
         )
         self.assertAlmostEqual(
             res['aggtype_"variance"'], expected_fft_var,
-            delta=rel_diff_allowed*expected_fft_var
+            delta=rel_diff_allowed * expected_fft_var
         )
 
     def test_number_peaks(self):
@@ -886,7 +889,6 @@ def test_linear_trend(self):
         param = [{"attr": "pvalue"}, {"attr": "rvalue"}, {"attr": "intercept"}, {"attr": "slope"}, {"attr": "stderr"}]
         res = linear_trend(x, param)
 
-
         res = pd.Series(dict(res))
 
         expected_index = ["attr_\"pvalue\"", "attr_\"intercept\"",
@@ -922,14 +924,17 @@ def test_linear_trend(self):
 
     def test__aggregate_on_chunks(self):
         self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="max", chunk_len=2), [1, 3])
-        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([1, 1, 3, 3]),  f_agg="max", chunk_len=2), [1, 3])
+        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([1, 1, 3, 3]), f_agg="max", chunk_len=2), [1, 3])
 
         self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="min", chunk_len=2), [0, 2])
         self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3, 5]), f_agg="min", chunk_len=2), [0, 2, 5])
 
-        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="mean", chunk_len=2), [0.5, 2.5])
-        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=2), [0.5, 2, 5])
-        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=3), [1/3, 4.5])
+        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="mean", chunk_len=2),
+                             [0.5, 2.5])
+        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=2),
+                             [0.5, 2, 5])
+        self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=3),
+                             [1 / 3, 4.5])
 
         self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3, 5, -2]),
                                                   f_agg="median", chunk_len=2), [0.5, 2.5, 1.5])
@@ -1090,7 +1095,7 @@ def test_linear_trend_timewise_seconds(self):
         """Test linear_trend_timewise function with second intervals."""
         # Try with different days
         x = pd.Series(
-            [0, 1/float(3600), 2/float(3600), 3/float(3600)],
+            [0, 1 / float(3600), 2 / float(3600), 3 / float(3600)],
             index=pd.DatetimeIndex([
                 '2018-01-01 04:00:01', '2018-01-01 04:00:02',
                 '2018-01-01 04:00:03', '2018-01-01 04:00:04'
@@ -1111,7 +1116,7 @@ def test_linear_trend_timewise_years(self):
         """Test linear_trend_timewise function with year intervals."""
         # Try with different days
         x = pd.Series(
-            [0, 365*24, 365*48, 365*72+24],  # Add 24 to the last one since it's a leap year
+            [0, 365 * 24, 365 * 48, 365 * 72 + 24],  # Add 24 to the last one since it's a leap year
             index=pd.DatetimeIndex([
                 '2018-01-01 04:00:00', '2019-01-01 04:00:00',
                 '2020-01-01 04:00:00', '2021-01-01 04:00:00'
@@ -1134,7 +1139,7 @@ def test_change_quantiles(self):
         res = change_quantiles(np.random.rand(10000) * 1000, 0.1, 0.2, False, 'mean')
         self.assertAlmostEqual(res, -0.9443846621365727)
 
-        
+
 class FriedrichTestCase(TestCase):
 
     def test_estimate_friedrich_coefficients(self):
@@ -1177,10 +1182,10 @@ def test_friedrich_number_of_returned_features_is_equal_to_number_of_parameters(
 
     def test_friedrich_equal_to_snapshot(self):
         param = [{"coeff": coeff, "m": 2, "r": 30} for coeff in range(4)]
-        x = np.array([-0.53, -0.61, -1.26, -0.88, -0.34,  0.58,  2.86, -0.47,  0.78,
-                      -0.45, -0.27,  0.43,  1.72,  0.26,  1.02, -0.09,  0.65,  1.49,
-                      -0.95, -1.02, -0.64, -1.63, -0.71, -0.43, -1.69,  0.05,  1.58,
-                      1.1,  0.55, -1.02])
+        x = np.array([-0.53, -0.61, -1.26, -0.88, -0.34, 0.58, 2.86, -0.47, 0.78,
+                      -0.45, -0.27, 0.43, 1.72, 0.26, 1.02, -0.09, 0.65, 1.49,
+                      -0.95, -1.02, -0.64, -1.63, -0.71, -0.43, -1.69, 0.05, 1.58,
+                      1.1, 0.55, -1.02])
 
         res = pd.Series(dict(friedrich_coefficients(x, param)))