From 6f9c7950d92ca84c9a4a44829d3aef283715a361 Mon Sep 17 00:00:00 2001 From: Emanuele Fumagalli Date: Wed, 1 Jul 2020 17:26:29 +0100 Subject: [PATCH] swapped function names for percentage of reoccuring (#725) * swapped function names for percentage of reoccuring * fix comments in reoccurring_values functions * working on reoccurring_values comments Co-authored-by: Emanuele Fumagalli --- .../test_feature_calculations.py | 8 ++++---- tsfresh/feature_extraction/feature_calculators.py | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/units/feature_extraction/test_feature_calculations.py b/tests/units/feature_extraction/test_feature_calculations.py index ce59b8196..f714be2d9 100644 --- a/tests/units/feature_extraction/test_feature_calculations.py +++ b/tests/units/feature_extraction/test_feature_calculations.py @@ -516,19 +516,19 @@ def test_first_location_of_minimum(self): def test_percentage_of_doubled_datapoints(self): self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_datapoints_to_all_datapoints, [1, 1, 2, 3, 4], - 0.25) + 0.4) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_datapoints_to_all_datapoints, [1, 1.5, 2, 3], 0) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_datapoints_to_all_datapoints, [1], 0) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_datapoints_to_all_datapoints, - [1.111, -2.45, 1.111, 2.45], 1.0 / 3.0) + [1.111, -2.45, 1.111, 2.45], 0.5) self.assertIsNanOnAllArrayTypes(percentage_of_reoccurring_datapoints_to_all_datapoints, []) def test_ratio_of_doubled_values(self): - self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, [1, 1, 2, 3, 4], 0.4) + self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, [1, 1, 2, 3, 4], 0.25) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, [1, 1.5, 2, 3], 0) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, [1], 0) self.assertAlmostEqualOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, - [1.111, -2.45, 1.111, 2.45], 0.5) + [1.111, -2.45, 1.111, 2.45], 1.0 / 3.0) self.assertIsNanOnAllArrayTypes(percentage_of_reoccurring_values_to_all_values, []) def test_sum_of_reoccurring_values(self): diff --git a/tsfresh/feature_extraction/feature_calculators.py b/tsfresh/feature_extraction/feature_calculators.py index 1b64589f0..d31237ea9 100644 --- a/tsfresh/feature_extraction/feature_calculators.py +++ b/tsfresh/feature_extraction/feature_calculators.py @@ -863,15 +863,15 @@ def first_location_of_minimum(x): @set_property("fctype", "simple") -def percentage_of_reoccurring_datapoints_to_all_datapoints(x): +def percentage_of_reoccurring_values_to_all_values(x): """ - Returns the percentage of unique values, that are present in the time series + Returns the percentage of values that are present in the time series more than once. len(different values occurring more than once) / len(different values) This means the percentage is normalized to the number of unique values, - in contrast to the percentage_of_reoccurring_values_to_all_values. + in contrast to the percentage_of_reoccurring_datapoints_to_all_datapoints. :param x: the time series to calculate the feature of :type x: numpy.ndarray @@ -891,15 +891,15 @@ def percentage_of_reoccurring_datapoints_to_all_datapoints(x): @set_property("fctype", "simple") @set_property("input", "pd.Series") -def percentage_of_reoccurring_values_to_all_values(x): +def percentage_of_reoccurring_datapoints_to_all_datapoints(x): """ - Returns the ratio of unique values, that are present in the time series - more than once. + Returns the percentage of non-unique data points. Non-unique means that they are + contained another time in the time series again. # of data points occurring more than once / # of all data points This means the ratio is normalized to the number of data points in the time series, - in contrast to the percentage_of_reoccurring_datapoints_to_all_datapoints. + in contrast to the percentage_of_reoccurring_values_to_all_values. :param x: the time series to calculate the feature of :type x: numpy.ndarray