Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Convert DWTTransformer to use numpy format internally #293

Merged
merged 2 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 32 additions & 50 deletions aeon/transformations/panel/dwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import math

import numpy as np
import pandas as pd

from aeon.datatypes import convert
from aeon.transformations.base import BaseTransformer

__author__ = "Vincent Nicholson"
Expand All @@ -18,73 +16,57 @@ class DWTTransformer(BaseTransformer):

Parameters
----------
num_levels : int, number of levels to perform the Haar wavelet
n_levels : int, number of levels to perform the Haar wavelet
transformation.

Examples
--------
>>> from aeon.transformations.panel import DWTTransformer
>>> import numpy as np
>>> data = np.array([[[1,2,3,4,5,6,7,8,9,10]],[[5,5,5,5,5,5,5,5,5,5]]])
>>> dwt = DWTTransformer(n_levels=2)
>>> data2 = dwt.fit_transform(data)

"""

_tags = {
"scitype:transform-input": "Series",
# what is the scitype of X: Series, or Panel
"scitype:transform-output": "Series",
# what scitype is returned: Primitives, Series, Panel
"scitype:instancewise": False, # is this an instance-wise transform?
"X_inner_mtype": "nested_univ", # which mtypes do _fit/_predict support for X?
"y_inner_mtype": "None", # which mtypes do _fit/_predict support for X?
"scitype:instancewise": False,
"X_inner_mtype": "numpy3D",
"y_inner_mtype": "None",
"fit_is_empty": True,
}

def __init__(self, num_levels=3):
self.num_levels = num_levels
def __init__(self, n_levels=3):
self.n_levels = n_levels
super(DWTTransformer, self).__init__()

def _transform(self, X, y=None):
"""Transform X and return a transformed version.

private _transform containing core logic, called from transform

Parameters
----------
X : nested pandas DataFrame of shape [n_instances, n_features]
each cell of X must contain pandas.Series
Data to fit transform to
X : 3D np.ndarray of shape = [n_instances, n_channels, series_length]
collection of time series to transform
y : ignored argument for interface compatibility
Additional data, e.g., labels for transformation

Returns
-------
Xt : nested pandas DataFrame of shape [n_instances, n_features]
each cell of Xt contains pandas.Series
transformed version of X
Xt : 3D np.ndarray of shape = [n_instances, n_channels, series_length]
collection of transformed time series
"""
n_instances, n_channels, n_timepoints = X.shape
_X = np.swapaxes(X, 0, 1)
self._check_parameters()

# Get information about the dataframe
col_names = X.columns

Xt = pd.DataFrame()
for x in col_names:
# Convert one of the columns in the dataframe to numpy array
arr = convert(
pd.DataFrame(X[x]),
from_type="nested_univ",
to_type="numpyflat",
as_scitype="Panel",
)

transformedData = self._extract_wavelet_coefficients(arr)

# Convert to a numpy array
transformedData = np.asarray(transformedData)

# Add it to the dataframe
colToAdd = []
for i in range(len(transformedData)):
inst = transformedData[i]
colToAdd.append(pd.Series(inst))

Xt[x] = colToAdd
# On each dimension, perform PAA
channels = []
for i in range(n_channels):
channels.append(self._extract_wavelet_coefficients(_X[i]))
result = np.array(channels)
result = np.swapaxes(result, 0, 1)

return Xt
return result

def _extract_wavelet_coefficients(self, data):
"""Extract wavelet coefficients of a 2d array of time series.
Expand All @@ -93,7 +75,7 @@ def _extract_wavelet_coefficients(self, data):
from levels 1 to num_levels followed by the approximation
coefficients of the highest level.
"""
num_levels = self.num_levels
num_levels = self.n_levels
res = []

for x in data:
Expand Down Expand Up @@ -123,14 +105,14 @@ def _check_parameters(self):
------
ValueError or TypeError if a parameters input is invalid.
"""
if isinstance(self.num_levels, int):
if self.num_levels <= -1:
if isinstance(self.n_levels, int):
if self.n_levels <= -1:
raise ValueError("num_levels must have the value" + "of at least 0")
else:
raise TypeError(
"num_levels must be an 'int'. Found"
+ "'"
+ type(self.num_levels).__name__
+ type(self.n_levels).__name__
+ "' instead."
)

Expand Down
156 changes: 56 additions & 100 deletions aeon/transformations/panel/tests/test_dwt.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,152 +2,108 @@
import math

import numpy as np
import pandas as pd
import pytest

from aeon.transformations.panel.dwt import DWTTransformer
from aeon.utils._testing.panel import _make_nested_from_array


# Check that exception is raised for bad num levels.
# input types - string, float, negative int, negative float, empty dict.
# correct input is meant to be a positive integer of 0 or more.
@pytest.mark.parametrize("bad_num_levels", ["str", 1.2, -1.2, -1, {}])
def test_bad_input_args(bad_num_levels):
X = _make_nested_from_array(np.ones(10), n_instances=10, n_columns=1)
X = np.ones(shape=(10, 1, 10))

if not isinstance(bad_num_levels, int):
with pytest.raises(TypeError):
DWTTransformer(num_levels=bad_num_levels).fit(X).transform(X)
DWTTransformer(n_levels=bad_num_levels).fit_transform(X)
else:
with pytest.raises(ValueError):
DWTTransformer(num_levels=bad_num_levels).fit(X).transform(X)
DWTTransformer(n_levels=bad_num_levels).fit(X).transform(X)


# Check the transformer has changed the data correctly.
def test_output_of_transformer():
X = _make_nested_from_array(
np.array([4, 6, 10, 12, 8, 6, 5, 5]), n_instances=1, n_columns=1
)
X = np.array([[[4, 6, 10, 12, 8, 6, 5, 5]]])

d = DWTTransformer(num_levels=2).fit(X)
d = DWTTransformer(n_levels=2).fit(X)
res = d.transform(X)
orig = convert_list_to_dataframe(
[[16, 12, -6, 2, -math.sqrt(2), -math.sqrt(2), math.sqrt(2), 0]]
)
orig.columns = X.columns
assert check_if_dataframes_are_equal(res, orig)

X = _make_nested_from_array(
np.array([-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3]), n_instances=1, n_columns=1
)
orig = np.array([[[16, 12, -6, 2, -math.sqrt(2), -math.sqrt(2), math.sqrt(2), 0]]])
np.testing.assert_array_almost_equal(res, orig)
X = np.array([[[-5, 2.5, 1, 3, 10, -1.5, 6, 12, -3]]])
d = d.fit(X)
res = d.transform(X)
orig = convert_list_to_dataframe(
orig = np.array(
[
[
0.75000,
13.25000,
-3.25000,
-4.75000,
-5.303301,
-1.414214,
8.131728,
-4.242641,
[
0.75000,
13.25000,
-3.25000,
-4.75000,
-5.303301,
-1.414214,
8.131728,
-4.242641,
]
]
]
)
# These are equivalent but cannot exactly test if two floats are equal
# res.iloc[0,0]
# orig.iloc[0,0]
# assert check_if_dataframes_are_equal(res,orig)
np.testing.assert_array_almost_equal(res, orig)


# This is to test that if num_levels = 0 then no change occurs.
def test_no_levels_does_no_change():
X = _make_nested_from_array(
np.array([1, 2, 3, 4, 5, 56]), n_instances=1, n_columns=1
)
d = DWTTransformer(num_levels=0).fit(X)
X = np.array([[[1, 2, 3, 4, 5, 56]]])
d = DWTTransformer(n_levels=0).fit(X)
res = d.transform(X)
assert check_if_dataframes_are_equal(res, X)
np.testing.assert_array_almost_equal(res, X)


@pytest.mark.parametrize("num_levels,corr_series_length", [(2, 12), (3, 11), (4, 12)])
def test_output_dimensions(num_levels, corr_series_length):
X = _make_nested_from_array(np.ones(13), n_instances=10, n_columns=1)

d = DWTTransformer(num_levels=num_levels).fit(X)
X = np.ones(shape=(10, 1, 13))
d = DWTTransformer(n_levels=num_levels).fit(X)
res = d.transform(X)

# get the dimension of the generated dataframe.
act_time_series_length = res.iloc[0, 0].shape[0]
num_rows = res.shape[0]
num_cols = res.shape[1]
n_cases, n_channels, series_length = res.shape

assert act_time_series_length == corr_series_length
assert num_rows == 10
assert num_cols == 1
assert series_length == corr_series_length
assert n_cases == 10
assert n_channels == 1


# This is to check that DWT produces the same result along each dimension
def test_dwt_performs_correcly_along_each_dim():
X = _make_nested_from_array(
np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), n_instances=1, n_columns=2
)

d = DWTTransformer(num_levels=3).fit(X)
X = np.array([[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]])
d = DWTTransformer(n_levels=3).fit(X)
res = d.transform(X)
orig = convert_list_to_dataframe(
orig = np.array(
[
[
9 * math.sqrt(2),
-4 * math.sqrt(2),
-2,
-2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
],
[
9 * math.sqrt(2),
-4 * math.sqrt(2),
-2,
-2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
],
[
9 * math.sqrt(2),
-4 * math.sqrt(2),
-2,
-2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
],
[
9 * math.sqrt(2),
-4 * math.sqrt(2),
-2,
-2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
-math.sqrt(2) / 2,
],
]
]
)
orig.columns = X.columns
assert check_if_dataframes_are_equal(res, orig)


def convert_list_to_dataframe(list_to_convert):
# Convert this into a panda's data frame
df = pd.DataFrame()
for i in range(len(list_to_convert)):
inst = list_to_convert[i]
data = []
data.append(pd.Series(inst))
df[i] = data

return df


def check_if_dataframes_are_equal(df1, df2):
"""
for some reason, this is how you check that two dataframes are equal.
"""
from pandas.testing import assert_frame_equal

try:
assert_frame_equal(df1, df2)
return True
except AssertionError:
return False
np.testing.assert_array_almost_equal(res, orig)