Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Features: absolute_maximum; mean_n_absolut_max | Add install_requirements.sh script #833

Merged
merged 21 commits into from
Apr 1, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
5 changes: 5 additions & 0 deletions install_requirements.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
OliEfr marked this conversation as resolved.
Show resolved Hide resolved
pip install -r requirements.txt
pip install -r rdocs-requirements.txt
pip install -r test-requirements.txt
pip install -e .
32 changes: 16 additions & 16 deletions tests/integrations/test_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,28 @@ def test_pandas(self):
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 18))
self.assertEqual(X.shape, (100, 20))

X = extract_features(df, column_id="my_id", column_sort="time", column_kind="dimension",
default_fc_parameters=MinimalFCParameters())
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 18))
self.assertEqual(X.shape, (100, 20))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id", column_sort="time",
default_fc_parameters=MinimalFCParameters())
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 9))
self.assertEqual(X.shape, (100, 10))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
default_fc_parameters=MinimalFCParameters())
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 9))
self.assertEqual(X.shape, (100, 10))

def test_pandas_no_pivot(self):
df = self.df
Expand All @@ -62,7 +62,7 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*18, 3))
self.assertEqual(X.shape, (100*20, 3))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
Expand All @@ -71,7 +71,7 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*18, 3))
self.assertEqual(X.shape, (100*20, 3))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
Expand All @@ -80,15 +80,15 @@ def test_pandas_no_pivot(self):
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*9, 3))
self.assertEqual(X.shape, (100*10, 3))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
pivot=False,
default_fc_parameters=MinimalFCParameters())
X = pd.DataFrame(X, columns=["my_id", "variable", "value"])
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*9, 3))
self.assertEqual(X.shape, (100*10, 3))

def test_dask(self):
df = dd.from_pandas(self.df, npartitions=1)
Expand All @@ -99,30 +99,30 @@ def test_dask(self):
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 18))
self.assertEqual(X.shape, (100, 20))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "1__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 18))
self.assertEqual(X.shape, (100, 20))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 9))
self.assertEqual(X.shape, (100, 10))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X.columns)
self.assertAlmostEqual(X.loc["5", "value__mean"], 5.516e-05, 4)
self.assertIn("11", X.index)
self.assertEqual(X.shape, (100, 9))
self.assertEqual(X.shape, (100, 10))

def test_dask_no_pivot(self):
df = dd.from_pandas(self.df, npartitions=1)
Expand All @@ -133,27 +133,27 @@ def test_dask_no_pivot(self):
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*18, 3))
self.assertEqual(X.shape, (100*20, 3))

X = extract_features(df, column_id="my_id", column_sort="time",
column_kind="dimension",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("1__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "1__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*18, 3))
self.assertEqual(X.shape, (100*20, 3))

X = extract_features(df.drop(columns=["dimension"]), column_id="my_id",
column_sort="time",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*9, 3))
self.assertEqual(X.shape, (100*10, 3))

X = extract_features(df.drop(columns=["dimension", "time"]), column_id="my_id",
pivot=False,
default_fc_parameters=MinimalFCParameters()).compute()
self.assertIn("value__mean", X["variable"].values)
self.assertAlmostEqual(X[(X["my_id"] == "5") & (X["variable"] == "value__mean")]["value"].iloc[0], 5.516e-05, 4)
self.assertEqual(X.shape, (100*9, 3))
self.assertEqual(X.shape, (100*10, 3))
9 changes: 9 additions & 0 deletions tests/units/feature_extraction/test_feature_calculations.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,15 @@ def test_root_mean_square(self):
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [1], 1)
self.assertAlmostEqualOnAllArrayTypes(root_mean_square, [-1], 1)

def test_mean_n_absolute_max(self):
self.assertIsNanOnAllArrayTypes(mean_n_absolute_max, [], number_of_maxima=1)
self.assertIsNanOnAllArrayTypes(mean_n_absolute_max, [12,3], number_of_maxima=10)
self.assertRaises(AssertionError, mean_n_absolute_max, [12,3], number_of_maxima=0)
self.assertRaises(AssertionError, mean_n_absolute_max, [12,3], number_of_maxima=-1)
self.assertAlmostEqualOnAllArrayTypes(mean_n_absolute_max, [-1, -5, 4, 10], 6.33333333333, number_of_maxima=3)
self.assertAlmostEqualOnAllArrayTypes(mean_n_absolute_max, [0, -5, -9], 7.000000, number_of_maxima=2)
self.assertAlmostEqualOnAllArrayTypes(mean_n_absolute_max, [0,0,0], 0, number_of_maxima=1)

def test_absolute_sum_of_changes(self):
self.assertEqualOnAllArrayTypes(absolute_sum_of_changes, [1, 1, 1, 1, 2, 1], 2)
self.assertEqualOnAllArrayTypes(absolute_sum_of_changes, [1, -1, 1, -1], 6)
Expand Down
2 changes: 1 addition & 1 deletion tests/units/feature_extraction/test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,5 +202,5 @@ def test_extraction_runs_through(self):

self.assertCountEqual(extracted_features.columns, ["0__median", "0__standard_deviation", "0__sum_values",
"0__maximum", "0__variance", "0__minimum", "0__mean",
"0__length", "0__root_mean_square"])
"0__length", "0__root_mean_square", "0__absolut_maximum"])
self.assertCountEqual(extracted_features.index, [0, 1])
34 changes: 34 additions & 0 deletions tsfresh/feature_extraction/feature_calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1547,6 +1547,26 @@ def c3(x, lag):
return np.mean((_roll(x, 2 * -lag) * _roll(x, -lag) * x)[0:(n - 2 * lag)])


@set_property("fctype", "simple")
@set_property("minimal", True)
def mean_n_absolute_max(x, number_of_maxima):
"""
Calculates the arithmetic mean of the n absolut maximum values of the time series.
OliEfr marked this conversation as resolved.
Show resolved Hide resolved

:param x: the time series to calculate the feature of
:type x: numpy.ndarray
:param number_of_maxima: the number of maxima, which should be considered
:type number_of_maxima: int

:return: the value of this feature
:return type: float
"""

assert number_of_maxima > 0, f" number_of_maxima={number_of_maxima} which is not greater than 1"

return np.mean(np.sort(np.absolute(x))[-number_of_maxima:]) if len(x) > number_of_maxima else np.NaN
OliEfr marked this conversation as resolved.
Show resolved Hide resolved
OliEfr marked this conversation as resolved.
Show resolved Hide resolved


@set_property("fctype", "simple")
def binned_entropy(x, max_bins):
"""
Expand Down Expand Up @@ -1889,6 +1909,20 @@ def maximum(x):
return np.max(x)


@set_property("fctype", "simple")
@set_property("minimal", True)
def absolut_maximum(x):
"""
Calculates the highest absolut value of the time series x.
OliEfr marked this conversation as resolved.
Show resolved Hide resolved

:param x: the time series to calculate the feature of
:type x: numpy.ndarray
:return: the value of this feature
:return type: float
"""
return np.max(np.absolute(x))
OliEfr marked this conversation as resolved.
Show resolved Hide resolved


@set_property("fctype", "simple")
@set_property("minimal", True)
def minimum(x):
Expand Down