From 3342199556eb8c248940fbff6734d28a36556437 Mon Sep 17 00:00:00 2001 From: Alex Kennedy Date: Wed, 27 Jan 2021 19:48:30 +1300 Subject: [PATCH] Fix minor typos in the docs --- docs/text/faq.rst | 4 ++-- docs/text/feature_extraction_settings.rst | 4 ++-- docs/text/how_to_contribute.rst | 2 +- docs/text/large_data.rst | 5 +++-- docs/text/tsfresh_on_a_cluster.rst | 2 +- tsfresh/convenience/relevant_extraction.py | 2 +- .../feature_extraction/feature_calculators.py | 16 ++++++++-------- tsfresh/feature_selection/relevance.py | 2 +- tsfresh/feature_selection/selection.py | 2 +- tsfresh/transformers/feature_selector.py | 4 ++-- .../transformers/relevant_feature_augmenter.py | 2 +- tsfresh/utilities/dataframe_functions.py | 2 +- tsfresh/utilities/distribution.py | 2 +- 13 files changed, 25 insertions(+), 24 deletions(-) diff --git a/docs/text/faq.rst b/docs/text/faq.rst index 7530c70e0..e988e8dc1 100644 --- a/docs/text/faq.rst +++ b/docs/text/faq.rst @@ -12,8 +12,8 @@ FAQ 2. **Is it possible to extract features from rolling/shifted time series?** - Yes, the :func:`tsfresh.dataframe_functions.roll_time_series` function allows to conviniently create a rolled - time series datframe from your data. You just have to transform your data into one of the supported tsfresh + Yes, the :func:`tsfresh.dataframe_functions.roll_time_series` function allows to conveniently create a rolled + time series dataframe from your data. You just have to transform your data into one of the supported tsfresh :ref:`data-formats-label`. Then, the :func:`tsfresh.dataframe_functions.roll_time_series` give you a DataFrame with the rolled time series, that you can pass to tsfresh. diff --git a/docs/text/feature_extraction_settings.rst b/docs/text/feature_extraction_settings.rst index a58337ed8..ca11efaba 100644 --- a/docs/text/feature_extraction_settings.rst +++ b/docs/text/feature_extraction_settings.rst @@ -111,12 +111,12 @@ By using feature selection algorithms you find out that only a subgroup of featu Then, we provide the :func:`tsfresh.feature_extraction.settings.from_columns` method that constructs the `kind_to_fc_parameters` dictionary from the column names of this filtered feature matrix to make sure that only relevant features are extracted. -This can save a huge amount of time because you prevent the calculation of uncessary features. +This can save a huge amount of time because you prevent the calculation of unnecessary features. Let's illustrate that with an example: .. code:: python - # X_tsfresh containes the extracted tsfresh features + # X_tsfresh contains the extracted tsfresh features X_tsfresh = extract_features(...) # which are now filtered to only contain relevant features diff --git a/docs/text/how_to_contribute.rst b/docs/text/how_to_contribute.rst index 0bfe4a346..a63e5cf74 100644 --- a/docs/text/how_to_contribute.rst +++ b/docs/text/how_to_contribute.rst @@ -78,7 +78,7 @@ or build the documentation with The finished documentation can be found in the docs/_build/html folder. On Github we use a Travis CI Folder that runs our test suite every time a commit or pull request is sent. The -configuration of Travi is controlled by the +configuration of Travis is controlled by the `.travis.yml `_ file. diff --git a/docs/text/large_data.rst b/docs/text/large_data.rst index d2e52b849..311ef0e15 100644 --- a/docs/text/large_data.rst +++ b/docs/text/large_data.rst @@ -4,7 +4,8 @@ Large Input Data ================ If you are dealing with large time series data, you are facing multiple problems. -Thw two most important ones are +The two most important ones are + * long execution times for feature extraction * large memory consumptions, even beyond what a single machine can handle @@ -79,6 +80,6 @@ No pivoting will be performed in this case. PySpark ------- -Similar to dask, it is also possible to ass the feature extraction into a Spark +Similar to dask, it is also possible to pass the feature extraction into a Spark computation graph. You can find more information in the documentation of :func:`tsfresh.convenience.bindings.spark_feature_extraction_on_chunk`. \ No newline at end of file diff --git a/docs/text/tsfresh_on_a_cluster.rst b/docs/text/tsfresh_on_a_cluster.rst index 7c0c7b22d..6aa74cfd0 100644 --- a/docs/text/tsfresh_on_a_cluster.rst +++ b/docs/text/tsfresh_on_a_cluster.rst @@ -150,7 +150,7 @@ The only thing that you will need to run *tsfresh* on a Dask cluster is the ip a `dask-scheduler `_. Lets say that your dask scheduler is running at ``192.168.0.1:8786``, then we can easily construct a -:class:`~sfresh.utilities.distribution.ClusterDaskDistributor` that connects to the sceduler and distributes the +:class:`~sfresh.utilities.distribution.ClusterDaskDistributor` that connects to the scheduler and distributes the time series data and the calculation to a cluster: .. code:: python diff --git a/tsfresh/convenience/relevant_extraction.py b/tsfresh/convenience/relevant_extraction.py index 58af586e4..c8d03b7f4 100644 --- a/tsfresh/convenience/relevant_extraction.py +++ b/tsfresh/convenience/relevant_extraction.py @@ -134,7 +134,7 @@ def extract_relevant_features(timeseries_container, y, X=None, :param ml_task: The intended machine learning task. Either `'classification'`, `'regression'` or `'auto'`. Defaults to `'auto'`, meaning the intended task is inferred from `y`. - If `y` has a boolean, integer or object dtype, the task is assumend to be classification, + If `y` has a boolean, integer or object dtype, the task is assumed to be classification, else regression. :type ml_task: str diff --git a/tsfresh/feature_extraction/feature_calculators.py b/tsfresh/feature_extraction/feature_calculators.py index 6a876d6ea..5642e51b8 100644 --- a/tsfresh/feature_extraction/feature_calculators.py +++ b/tsfresh/feature_extraction/feature_calculators.py @@ -135,9 +135,9 @@ def _estimate_friedrich_coefficients(x, m, r): :param x: the time series to calculate the feature of :type x: numpy.ndarray - :param m: order of polynom to fit for estimating fixed points of dynamics + :param m: order of polynomial to fit for estimating fixed points of dynamics :type m: int - :param r: number of quantils to use for averaging + :param r: number of quantiles to use for averaging :type r: float :return: coefficients of polynomial of deterministic dynamics @@ -1283,7 +1283,7 @@ def cwt_coefficients(x, param): where :math:`a` is the width parameter of the wavelet function. - This feature calculator takes three different parameter: widths, coeff and w. The feature calculater takes all the + This feature calculator takes three different parameter: widths, coeff and w. The feature calculator takes all the different widths arrays and then calculates the cwt one time for each different width array. Then the values for the different coefficient for coeff and width w are returned. (For each dic in param one feature is returned) @@ -1948,8 +1948,8 @@ def friedrich_coefficients(x, param): :param x: the time series to calculate the feature of :type x: numpy.ndarray :param param: contains dictionaries {"m": x, "r": y, "coeff": z} with x being positive integer, - the order of polynom to fit for estimating fixed points of - dynamics, y positive float, the number of quantils to use for averaging and finally z, + the order of polynomial to fit for estimating fixed points of + dynamics, y positive float, the number of quantiles to use for averaging and finally z, a positive integer corresponding to the returned coefficient :type param: list :return: the different feature values @@ -1957,7 +1957,7 @@ def friedrich_coefficients(x, param): """ # calculated is dictionary storing the calculated coefficients {m: {r: friedrich_coefficients}} calculated = defaultdict(dict) - # res is a dictionary containg the results {"m_10__r_2__coeff_3": 15.43} + # res is a dictionary containing the results {"m_10__r_2__coeff_3": 15.43} res = {} for parameter_combination in param: @@ -1996,9 +1996,9 @@ def max_langevin_fixed_point(x, r, m): :param x: the time series to calculate the feature of :type x: numpy.ndarray - :param m: order of polynom to fit for estimating fixed points of dynamics + :param m: order of polynomial to fit for estimating fixed points of dynamics :type m: int - :param r: number of quantils to use for averaging + :param r: number of quantiles to use for averaging :type r: float :return: Largest fixed point of deterministic dynamics diff --git a/tsfresh/feature_selection/relevance.py b/tsfresh/feature_selection/relevance.py index 8aacc00f2..46e0f8586 100644 --- a/tsfresh/feature_selection/relevance.py +++ b/tsfresh/feature_selection/relevance.py @@ -93,7 +93,7 @@ def calculate_relevance_table( :param ml_task: The intended machine learning task. Either `'classification'`, `'regression'` or `'auto'`. Defaults to `'auto'`, meaning the intended task is inferred from `y`. - If `y` has a boolean, integer or object dtype, the task is assumend to be classification, + If `y` has a boolean, integer or object dtype, the task is assumed to be classification, else regression. :type ml_task: str diff --git a/tsfresh/feature_selection/selection.py b/tsfresh/feature_selection/selection.py index 126f48fbd..af2ecffef 100644 --- a/tsfresh/feature_selection/selection.py +++ b/tsfresh/feature_selection/selection.py @@ -125,7 +125,7 @@ def select_features( :param ml_task: The intended machine learning task. Either `'classification'`, `'regression'` or `'auto'`. Defaults to `'auto'`, meaning the intended task is inferred from `y`. - If `y` has a boolean, integer or object dtype, the task is assumend to be classification, + If `y` has a boolean, integer or object dtype, the task is assumed to be classification, else regression. :type ml_task: str diff --git a/tsfresh/transformers/feature_selector.py b/tsfresh/transformers/feature_selector.py index 9a4dce6f1..0558f11b5 100644 --- a/tsfresh/transformers/feature_selector.py +++ b/tsfresh/transformers/feature_selector.py @@ -106,7 +106,7 @@ def __init__( :param ml_task: The intended machine learning task. Either `'classification'`, `'regression'` or `'auto'`. Defaults to `'auto'`, meaning the intended task is inferred from `y`. - If `y` has a boolean, integer or object dtype, the task is assumend to be classification, + If `y` has a boolean, integer or object dtype, the task is assumed to be classification, else regression. :type ml_task: str @@ -150,7 +150,7 @@ def __init__( def fit(self, X, y): """ - Extract the information, which of the features are relevent using the given target. + Extract the information, which of the features are relevant using the given target. For more information, please see the :func:`~tsfresh.festure_selection.festure_selector.check_fs_sig_bh` function. All columns in the input data sample are treated as feature. The index of all diff --git a/tsfresh/transformers/relevant_feature_augmenter.py b/tsfresh/transformers/relevant_feature_augmenter.py index 396bf3a12..7eb1c69ed 100644 --- a/tsfresh/transformers/relevant_feature_augmenter.py +++ b/tsfresh/transformers/relevant_feature_augmenter.py @@ -191,7 +191,7 @@ def __init__( :param ml_task: The intended machine learning task. Either `'classification'`, `'regression'` or `'auto'`. Defaults to `'auto'`, meaning the intended task is inferred from `y`. - If `y` has a boolean, integer or object dtype, the task is assumend to be classification, + If `y` has a boolean, integer or object dtype, the task is assumed to be classification, else regression. :type ml_task: str diff --git a/tsfresh/utilities/dataframe_functions.py b/tsfresh/utilities/dataframe_functions.py index 7fc3a43f9..de91c1453 100644 --- a/tsfresh/utilities/dataframe_functions.py +++ b/tsfresh/utilities/dataframe_functions.py @@ -584,7 +584,7 @@ def add_sub_time_series_index(df_or_dict, sub_length, column_id=None, column_sor - if column_id is None: for each kind (or if column_kind is None for the full dataframe) a new index built by "sub-packaging" the data in packages of length "sub_length". For example if you have data with the length of 11 and sub_length is 2, you will get 6 new packages: 0, 0; 1, 1; 2, 2; 3, 3; 4, 4; 5. - - if column_id is not None: the same as before, just for each id seperately. The old column_id values are added + - if column_id is not None: the same as before, just for each id separately. The old column_id values are added to the new "id" column after a comma You can use this functions to turn a long measurement into sub-packages, where you want to extract features on. diff --git a/tsfresh/utilities/distribution.py b/tsfresh/utilities/distribution.py index 17f720f88..94dc66787 100644 --- a/tsfresh/utilities/distribution.py +++ b/tsfresh/utilities/distribution.py @@ -346,7 +346,7 @@ class ClusterDaskDistributor(IterableDistributorBaseClass): def __init__(self, address): """ - Sets up a distributor that connects to a Dask Scheduler to distribute the calculaton of the features + Sets up a distributor that connects to a Dask Scheduler to distribute the calculation of the features :param address: the ip address and port number of the Dask Scheduler :type address: str