Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pep8 #607

Merged
merged 6 commits into from
Dec 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/stylecheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
on: [pull_request]
name: Python Style Check
jobs:
pycodestyle:
name: pycodestyle
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: pycodestyle
uses: ankitvgupta/pycodestyle-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PRECOMMAND_MESSAGE: You have style errors. See them below.
18 changes: 9 additions & 9 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# All configuration values have a default; values that are commented out
# serve to show the default.

import datetime
import sys

# If extensions (or modules to document with autodoc) are in another directory,
Expand Down Expand Up @@ -61,7 +62,6 @@
master_doc = 'index'

# General information about the project.
import datetime
now = datetime.datetime.today()
project = 'tsfresh'
copyright = '2016-{}, Maximilian Christ et al./ Blue Yonder GmbH'.format(now.year)
Expand Down Expand Up @@ -205,21 +205,21 @@
# -- Options for LaTeX output --------------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
# 'papersize': 'letterpaper',
# The paper size ('letterpaper' or 'a4paper').
# 'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
# 'pointsize': '10pt',
# The font size ('10pt', '11pt' or '12pt').
# 'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
# 'preamble': '',
# Additional stuff for the LaTeX preamble.
# 'preamble': '',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'user_guide.tex', 'tsfresh Documentation',
'', 'manual'),
('index', 'user_guide.tex', 'tsfresh Documentation',
'', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
Expand Down
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,6 @@ no-vcs = 1
formats = bdist_wheel
# do not upload the docs as we host them on read-the-docs
with-docs = 0

[pycodestyle]
max-line-length = 120
Empty file.
Empty file.
10 changes: 6 additions & 4 deletions tests/integrations/examples/test_driftbif_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ def test_relaxation_dynamics(self):
k3t = ds.kappa_3 * ds.tau
k3st = ds.kappa_3 ** 2 * ds.tau
a0 = v0 / ds.kappa_3
acceleration = lambda t: ds.kappa_3 * (a0 * np.sqrt(k3t - 1) * np.exp(k3st * t) /
np.sqrt(np.exp(2.0 * k3st * t) * ds.Q * a0 ** 2 +
np.exp(2.0 * ds.kappa_3 * t) * (k3t - 1 - ds.Q * a0 ** 2)))

def acceleration(t): return ds.kappa_3 * (a0 * np.sqrt(k3t - 1) * np.exp(k3st * t) /
np.sqrt(np.exp(2.0 * k3st * t) * ds.Q * a0 ** 2 +
np.exp(2.0 * ds.kappa_3 * t) * (k3t - 1 - ds.Q * a0 ** 2)))
t = ds.delta_t * np.arange(Nt)
return np.testing.assert_array_almost_equal(v[:, 0], np.vectorize(acceleration)(t),
decimal=8)
Expand All @@ -55,7 +56,8 @@ def test_dimensionality(self):
Nt = 10
v = ds.simulate(Nt)
self.assertEqual(v.shape, (Nt, 2),
'The default configuration should return velocities from a two-dimensional dissipative soliton.')
"The default configuration should return velocities "
"from a two-dimensional dissipative soliton.")

v = ds.simulate(Nt, v0=np.zeros(3))
self.assertEqual(v.shape, (Nt, 3),
Expand Down
3 changes: 2 additions & 1 deletion tests/integrations/examples/test_har_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from tsfresh.examples.har_dataset import download_har_dataset, load_har_dataset, load_har_classes
from pandas import DataFrame, Series


class HumanActivityTestCase(TestCase):
def setUp(self):
download_har_dataset()
Expand All @@ -20,4 +21,4 @@ def test_characteristics_downloaded_robot_execution_failures(self):
self.assertIsInstance(self.classes, Series)

def test_index(self):
self.assertCountEqual(self.data.index, self.classes.index)
self.assertCountEqual(self.data.index, self.classes.index)
2 changes: 1 addition & 1 deletion tests/integrations/test_full_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ def test_relevant_extraction(self):
'F_x__variance_larger_than_standard_deviation'}

self.assertGreaterEqual(set(extracted_features.columns), some_expected_features)
self.assertGreater(len(extracted_features), 0)
self.assertGreater(len(extracted_features), 0)
8 changes: 4 additions & 4 deletions tests/integrations/test_notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def _notebook_run(path, timeout=default_timeout):
try:
if os.environ['TRAVIS']:
return [], []
except:
except BaseException:
pass

# Ensure temporary files are not auto-deleted as processes have limited
# Ensure temporary files are not auto-deleted as processes have limited
# permissions to re-use file handles under WinNT-based operating systems.
fname = ''
with tempfile.NamedTemporaryFile(mode='w+t', suffix=".ipynb", delete=False) as fout:
fname = fout.name

args = ["jupyter", "nbconvert",
"--to", "notebook", "--execute", execproc_timeout]
args += ["--ExecutePreprocessor.kernel_name=python3"]
Expand All @@ -48,7 +48,7 @@ def _notebook_run(path, timeout=default_timeout):
os.remove(fname)

errors = [output for cell in nb.cells if "outputs" in cell
for output in cell["outputs"] \
for output in cell["outputs"]
if output.output_type == "error"]
return nb, errors

Expand Down
3 changes: 1 addition & 2 deletions tests/integrations/test_relevant_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas.util.testing as pdt
import pandas as pd


class RelevantFeatureExtractionDataTestCase(DataTestCase):
"""
Test case for the relevant_feature_extraction function
Expand Down Expand Up @@ -109,5 +110,3 @@ def test_raises_y_not_more_than_one_label(self):
df_dict = {"a": pd.DataFrame({"val": [1, 2, 3, 4, 10, 11], "id": [1, 1, 1, 1, 2, 2]}),
"b": pd.DataFrame({"val": [5, 6, 7, 8, 12, 13], "id": [4, 4, 3, 3, 2, 2]})}
self.assertRaises(AssertionError, extract_relevant_features, df_dict, y, None, None, None, "id", None, "val")


20 changes: 12 additions & 8 deletions tests/units/feature_extraction/test_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,17 @@ def test_simple_data_sample_four_timeseries(self):
df.sort_values(by=["id", "kind", "sort"], inplace=True)

result = generate_data_chunk_format(df, "id", "kind", "val")
expected = [(10, 'a', pd.Series([36, 71, 27, 62, 56, 58, 67, 11, 2, 24, 45, 30, 0, 9, 41, 28, 33, 19, 29, 43],
index=[10]*20, name="val")),
(10, 'b', pd.Series([78, 37, 23, 44, 6, 3, 21, 61, 39, 31, 53, 16, 66, 50, 40, 47, 7, 42, 38, 55],
index=[10] *20, name="val")),
(500, 'a', pd.Series([76, 72, 74, 75, 32, 64, 46, 35, 15, 70, 57, 65, 51, 26, 5, 25, 10, 69, 73, 77],
index=[500]*20, name="val")),
(500, 'b', pd.Series([8, 60, 12, 68, 22, 17, 18, 63, 49, 34, 20, 52, 48, 14, 79, 4, 1, 59, 54, 13],
index=[500] *20, name="val"))]
expected = [(10, 'a', pd.Series([36, 71, 27, 62, 56, 58, 67, 11, 2, 24, 45, 30, 0,
9, 41, 28, 33, 19, 29, 43],
index=[10] * 20, name="val")),
(10, 'b', pd.Series([78, 37, 23, 44, 6, 3, 21, 61, 39, 31, 53, 16, 66,
50, 40, 47, 7, 42, 38, 55],
index=[10] * 20, name="val")),
(500, 'a', pd.Series([76, 72, 74, 75, 32, 64, 46, 35, 15, 70, 57, 65,
51, 26, 5, 25, 10, 69, 73, 77],
index=[500] * 20, name="val")),
(500, 'b', pd.Series([8, 60, 12, 68, 22, 17, 18, 63, 49, 34, 20, 52,
48, 14, 79, 4, 1, 59, 54, 13],
index=[500] * 20, name="val"))]

self.assert_data_chunk_object_equal(result, expected)
65 changes: 35 additions & 30 deletions tests/units/feature_extraction/test_feature_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def assertFalseOnAllArrayTypes(self, f, input_to_f, *args, **kwargs):

def assertAllFalseOnAllArrayTypes(self, f, input_to_f, *args, **kwargs):
self.assertFalse(any(dict(f(input_to_f, *args, **kwargs)).values()), msg="Not false for lists")
self.assertFalse(any(dict(f(np.array(input_to_f), *args, **kwargs)).values()), msg="Not false for numpy.arrays")
self.assertFalse(any(dict(f(pd.Series(input_to_f), *args, **kwargs)).values()), msg="Not false for pandas.Series")
self.assertFalse(any(dict(f(np.array(input_to_f), *args, **kwargs)).values()),
msg="Not false for numpy.arrays")
self.assertFalse(any(dict(f(pd.Series(input_to_f), *args, **kwargs)).values()),
msg="Not false for pandas.Series")

def assertAlmostEqualOnAllArrayTypes(self, f, input_t_f, result, *args, **kwargs):
self.assertAlmostEqual(f(input_t_f, *args, **kwargs), result,
Expand All @@ -71,7 +73,7 @@ def assertEqualPandasSeriesWrapper(self, f, input_to_f, result, *args, **kwargs)
def test__roll(self):
x = np.random.normal(size=30)
for shift in [0, 1, 10, 11, 30, 31, 50, 51, 150, 151]:
np.testing.assert_array_equal(_roll(x, shift), np.roll(x, shift))
np.testing.assert_array_equal(_roll(x, shift), np.roll(x, shift))
np.testing.assert_array_equal(_roll(x, -shift), np.roll(x, -shift))

def test___get_length_sequences_where(self):
Expand Down Expand Up @@ -99,7 +101,7 @@ def test_large_standard_deviation(self):

def test_symmetry_looking(self):
self.assertAllTrueOnAllArrayTypes(symmetry_looking, [-1, -1, 1, 1],
[dict(r=0.05), dict(r=0.75)])
[dict(r=0.05), dict(r=0.75)])
self.assertAllFalseOnAllArrayTypes(symmetry_looking, [-1, -1, 1, 1], [dict(r=0)])
self.assertAllFalseOnAllArrayTypes(symmetry_looking, [-1, -1, -1, -1, 1], [dict(r=0.05)])
self.assertAllTrueOnAllArrayTypes(symmetry_looking, [-2, -2, -2, -1, -1, -1], [dict(r=0.05)])
Expand Down Expand Up @@ -240,7 +242,6 @@ def test_partial_autocorrelation(self):
else:
self.assertIsNaN(lag_val)


def test_augmented_dickey_fuller(self):
# todo: add unit test for the values of the test statistic

Expand All @@ -266,7 +267,7 @@ def test_augmented_dickey_fuller(self):
x = [0] * m
x[0] = 100
for i in range(1, m):
x[i] = x[i-1] * 0.5 + e[i]
x[i] = x[i - 1] * 0.5 + e[i]
param = [{"attr": "teststat"}, {"attr": "pvalue"}, {"attr": "usedlag"}]
expected_index = ['attr_"teststat"', 'attr_"pvalue"', 'attr_"usedlag"']

Expand Down Expand Up @@ -302,10 +303,10 @@ def test_cid_ce(self):

def test_ratio_beyond_r_sigma(self):

x = [0, 1]*10 + [10, 20, -30] # std of x is 7.21, mean 3.04
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 3./len(x), r=1)
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 2./len(x), r=2)
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 1./len(x), r=3)
x = [0, 1] * 10 + [10, 20, -30] # std of x is 7.21, mean 3.04
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 3. / len(x), r=1)
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 2. / len(x), r=2)
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 1. / len(x), r=3)
self.assertEqualOnAllArrayTypes(ratio_beyond_r_sigma, x, 0, r=20)

def test_mean_abs_change(self):
Expand Down Expand Up @@ -472,7 +473,7 @@ def test_fft_coefficient(self):
param = [{"coeff": 0, "attr": "real"}, {"coeff": 1, "attr": "real"}, {"coeff": 2, "attr": "real"},
{"coeff": 0, "attr": "imag"}, {"coeff": 1, "attr": "imag"}, {"coeff": 2, "attr": "imag"},
{"coeff": 0, "attr": "angle"}, {"coeff": 1, "attr": "angle"}, {"coeff": 2, "attr": "angle"},
{"coeff": 0, "attr": "abs"}, {"coeff": 1, "attr": "abs"}, {"coeff": 2, "attr": "abs"} ]
{"coeff": 0, "attr": "abs"}, {"coeff": 1, "attr": "abs"}, {"coeff": 2, "attr": "abs"}]
expected_index = ['coeff_0__attr_"real"', 'coeff_1__attr_"real"', 'coeff_2__attr_"real"',
'coeff_0__attr_"imag"', 'coeff_1__attr_"imag"', 'coeff_2__attr_"imag"',
'coeff_0__attr_"angle"', 'coeff_1__attr_"angle"', 'coeff_2__attr_"angle"',
Expand Down Expand Up @@ -524,7 +525,7 @@ def test_fft_aggregated(self):
self.assertAlmostEqual(res['aggtype_"kurtosis"'], 3.643, places=3)

# Scalar multiplying the distribution should not change the results:
x = 10*x
x = 10 * x
res = pd.Series(dict(fft_aggregated(x, param)))
self.assertCountEqual(list(res.index), expected_index)
self.assertAlmostEqual(res['aggtype_"centroid"'], 1.135, places=3)
Expand All @@ -545,15 +546,17 @@ def test_fft_aggregated(self):

# Gaussian test:
def normal(y, mean_, sigma_):
return 1/(2 * np.pi * sigma_ ** 2) * np.exp(-(y - mean_) ** 2 / (2 * sigma_ ** 2))
mean_ = 500.; sigma_ = 1.; range_ = int(2*mean_)
return 1 / (2 * np.pi * sigma_ ** 2) * np.exp(-(y - mean_) ** 2 / (2 * sigma_ ** 2))
mean_ = 500.
sigma_ = 1.
range_ = int(2 * mean_)
x = list(map(lambda x: normal(x, mean_, sigma_), range(range_)))

# The fourier transform of a Normal dist in the positive halfspace is a half normal,
# Hand calculated values of centroid and variance based for the half-normal dist:
# (Ref: https://en.wikipedia.org/wiki/Half-normal_distribution)
expected_fft_centroid = (range_/(2*np.pi*sigma_))*np.sqrt(2/np.pi)
expected_fft_var = (range_/(2*np.pi*sigma_))**2*(1-2/np.pi)
expected_fft_centroid = (range_ / (2 * np.pi * sigma_)) * np.sqrt(2 / np.pi)
expected_fft_var = (range_ / (2 * np.pi * sigma_))**2 * (1 - 2 / np.pi)

# Calculate values for unit test:
res = pd.Series(dict(fft_aggregated(x, param)))
Expand All @@ -563,11 +566,11 @@ def normal(y, mean_, sigma_):
rel_diff_allowed = 0.02
self.assertAlmostEqual(
res['aggtype_"centroid"'], expected_fft_centroid,
delta=rel_diff_allowed*expected_fft_centroid
delta=rel_diff_allowed * expected_fft_centroid
)
self.assertAlmostEqual(
res['aggtype_"variance"'], expected_fft_var,
delta=rel_diff_allowed*expected_fft_var
delta=rel_diff_allowed * expected_fft_var
)

def test_number_peaks(self):
Expand Down Expand Up @@ -886,7 +889,6 @@ def test_linear_trend(self):
param = [{"attr": "pvalue"}, {"attr": "rvalue"}, {"attr": "intercept"}, {"attr": "slope"}, {"attr": "stderr"}]
res = linear_trend(x, param)


res = pd.Series(dict(res))

expected_index = ["attr_\"pvalue\"", "attr_\"intercept\"",
Expand Down Expand Up @@ -922,14 +924,17 @@ def test_linear_trend(self):

def test__aggregate_on_chunks(self):
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="max", chunk_len=2), [1, 3])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([1, 1, 3, 3]), f_agg="max", chunk_len=2), [1, 3])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([1, 1, 3, 3]), f_agg="max", chunk_len=2), [1, 3])

self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="min", chunk_len=2), [0, 2])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3, 5]), f_agg="min", chunk_len=2), [0, 2, 5])

self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="mean", chunk_len=2), [0.5, 2.5])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=2), [0.5, 2, 5])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=3), [1/3, 4.5])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3]), f_agg="mean", chunk_len=2),
[0.5, 2.5])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=2),
[0.5, 2, 5])
self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 0, 4, 5]), f_agg="mean", chunk_len=3),
[1 / 3, 4.5])

self.assertListEqual(_aggregate_on_chunks(x=pd.Series([0, 1, 2, 3, 5, -2]),
f_agg="median", chunk_len=2), [0.5, 2.5, 1.5])
Expand Down Expand Up @@ -1090,7 +1095,7 @@ def test_linear_trend_timewise_seconds(self):
"""Test linear_trend_timewise function with second intervals."""
# Try with different days
x = pd.Series(
[0, 1/float(3600), 2/float(3600), 3/float(3600)],
[0, 1 / float(3600), 2 / float(3600), 3 / float(3600)],
index=pd.DatetimeIndex([
'2018-01-01 04:00:01', '2018-01-01 04:00:02',
'2018-01-01 04:00:03', '2018-01-01 04:00:04'
Expand All @@ -1111,7 +1116,7 @@ def test_linear_trend_timewise_years(self):
"""Test linear_trend_timewise function with year intervals."""
# Try with different days
x = pd.Series(
[0, 365*24, 365*48, 365*72+24], # Add 24 to the last one since it's a leap year
[0, 365 * 24, 365 * 48, 365 * 72 + 24], # Add 24 to the last one since it's a leap year
index=pd.DatetimeIndex([
'2018-01-01 04:00:00', '2019-01-01 04:00:00',
'2020-01-01 04:00:00', '2021-01-01 04:00:00'
Expand All @@ -1134,7 +1139,7 @@ def test_change_quantiles(self):
res = change_quantiles(np.random.rand(10000) * 1000, 0.1, 0.2, False, 'mean')
self.assertAlmostEqual(res, -0.9443846621365727)


class FriedrichTestCase(TestCase):

def test_estimate_friedrich_coefficients(self):
Expand Down Expand Up @@ -1177,10 +1182,10 @@ def test_friedrich_number_of_returned_features_is_equal_to_number_of_parameters(

def test_friedrich_equal_to_snapshot(self):
param = [{"coeff": coeff, "m": 2, "r": 30} for coeff in range(4)]
x = np.array([-0.53, -0.61, -1.26, -0.88, -0.34, 0.58, 2.86, -0.47, 0.78,
-0.45, -0.27, 0.43, 1.72, 0.26, 1.02, -0.09, 0.65, 1.49,
-0.95, -1.02, -0.64, -1.63, -0.71, -0.43, -1.69, 0.05, 1.58,
1.1, 0.55, -1.02])
x = np.array([-0.53, -0.61, -1.26, -0.88, -0.34, 0.58, 2.86, -0.47, 0.78,
-0.45, -0.27, 0.43, 1.72, 0.26, 1.02, -0.09, 0.65, 1.49,
-0.95, -1.02, -0.64, -1.63, -0.71, -0.43, -1.69, 0.05, 1.58,
1.1, 0.55, -1.02])

res = pd.Series(dict(friedrich_coefficients(x, param)))

Expand Down
Loading