From 0634634857ea4cca9e9bdb3f6244eb13afcae2f9 Mon Sep 17 00:00:00 2001 From: Stijn Date: Wed, 17 Jan 2024 12:10:24 +0100 Subject: [PATCH] Remove scipy dependency. --- frm/_frm_py/preprocessing.py | 8 +++----- pyproject.toml | 1 - tests/test_data.py | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/frm/_frm_py/preprocessing.py b/frm/_frm_py/preprocessing.py index e5a9c85..de31c2d 100644 --- a/frm/_frm_py/preprocessing.py +++ b/frm/_frm_py/preprocessing.py @@ -4,9 +4,7 @@ """ from typing import Union -from scipy.stats import zscore -from numpy import nan_to_num - +import numpy as np # https://stackoverflow.com/a/60617044 numeric = Union[int, float] @@ -61,9 +59,9 @@ def standardise(timeseries: list[list]): Database of standardised time series. """ try: - return nan_to_num(zscore(timeseries, axis=1)) + return np.nan_to_num((timeseries - np.mean(timeseries, axis=1, keepdims=True)) / np.std(timeseries, axis=1, keepdims=True)) except ValueError: - return [nan_to_num(zscore(ts)) for ts in timeseries] + return [np.nan_to_num((ts - np.nanmean(ts)) / np.nanstd(ts)) for ts in timeseries] # Defined in Lin, Keogh, Linardi, & Chiu (2003). A Symbolic Representation of Time Series, diff --git a/pyproject.toml b/pyproject.toml index ae9276a..d1c1820 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ classifiers = [ ] dependencies = [ "numpy", - "scipy", ] [project.optional-dependencies] diff --git a/tests/test_data.py b/tests/test_data.py index 3249682..b7e3193 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -1,5 +1,4 @@ import numpy as np -from scipy.stats import zscore ts = [ [0, 1, 2, 2, 1, 0], @@ -61,4 +60,4 @@ ] np.random.seed(0) -data = [zscore(np.random.random(np.random.randint(10, 1000))).tolist() for _ in range(100)] +data = [np.random.random(np.random.randint(10, 1000)).tolist() for _ in range(100)]