Skip to content

Commit

Permalink
Merge branch 'main' into imbalance
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyBagnall committed Feb 2, 2025
2 parents 770ea75 + 00e6999 commit 460a378
Show file tree
Hide file tree
Showing 19 changed files with 559 additions and 237 deletions.
9 changes: 9 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2647,6 +2647,15 @@
"contributions": [
"code"
]
},
{
"login": "Akhil-Jasson",
"name": "Akhil Jasson",
"avatar_url": "https://avatars.githubusercontent.com/u/114808672?v=4",
"profile": "https://react-portfolio-git-main-akhil-jassons-projects.vercel.app/",
"contributions": [
"doc"
]
}
],
"commitType": "docs"
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
args: [ "--create", "--python-folders", "aeon" ]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.2
rev: v0.9.3
hooks:
- id: ruff
args: [ "--fix"]
Expand Down
65 changes: 33 additions & 32 deletions CONTRIBUTORS.md

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions aeon/classification/convolution_based/_arsenal.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ class Arsenal(BaseClassifier):

def __init__(
self,
n_kernels=2000,
n_estimators=25,
rocket_transform="rocket",
max_dilations_per_kernel=32,
n_features_per_kernel=4,
time_limit_in_minutes=0.0,
contract_max_n_estimators=100,
n_kernels: int = 2000,
n_estimators: int = 25,
rocket_transform: str = "rocket",
max_dilations_per_kernel: int = 32,
n_features_per_kernel: int = 4,
time_limit_in_minutes: float = 0.0,
contract_max_n_estimators: int = 100,
class_weight=None,
n_jobs=1,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
Expand Down
7 changes: 6 additions & 1 deletion aeon/classification/convolution_based/_hydra.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,12 @@ class HydraClassifier(BaseClassifier):
}

def __init__(
self, n_kernels=8, n_groups=64, class_weight=None, n_jobs=1, random_state=None
self,
n_kernels: int = 8,
n_groups: int = 64,
class_weight=None,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
self.n_groups = n_groups
Expand Down
6 changes: 3 additions & 3 deletions aeon/classification/convolution_based/_minirocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,11 @@ class MiniRocketClassifier(BaseClassifier):

def __init__(
self,
n_kernels=10000,
max_dilations_per_kernel=32,
n_kernels: int = 10000,
max_dilations_per_kernel: int = 32,
estimator=None,
class_weight=None,
n_jobs=1,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
Expand Down
16 changes: 11 additions & 5 deletions aeon/classification/convolution_based/_mr_hydra.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,18 @@ class MultiRocketHydraClassifier(BaseClassifier):
Number of kernels per group for the Hydra transform.
n_groups : int, default=64
Number of groups per dilation for the Hydra transform.
class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
class_weight{None, “balanced”}, dict or list of dicts, default=None
From sklearn documentation:
If not given, all classes are supposed to have weight one.
If None, all classes are assigned equal weights.
The “balanced” mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as
n_samples / (n_classes * np.bincount(y))
The “balanced_subsample” mode is the same as “balanced” except that weights
are computed based on the bootstrap sample for every tree grown.
For multi-output, the weights of each column of y will be multiplied.
A dictionary can also be provided to specify weights for each class manually.
Note that these weights will be multiplied with sample_weight (passed through
the fit method) if sample_weight is specified.
Note: "balanced_subsample" is not supported as RidgeClassifierCV
is not an ensemble model.
n_jobs : int, default=1
The number of jobs to run in parallel for both `fit` and `predict`.
``-1`` means using all processors.
Expand Down Expand Up @@ -88,7 +89,12 @@ class MultiRocketHydraClassifier(BaseClassifier):
}

def __init__(
self, n_kernels=8, n_groups=64, class_weight=None, n_jobs=1, random_state=None
self,
n_kernels: int = 8,
n_groups: int = 64,
class_weight=None,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
self.n_groups = n_groups
Expand Down
8 changes: 4 additions & 4 deletions aeon/classification/convolution_based/_multirocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,12 @@ class MultiRocketClassifier(BaseClassifier):

def __init__(
self,
n_kernels=10000,
max_dilations_per_kernel=32,
n_features_per_kernel=4,
n_kernels: int = 10000,
max_dilations_per_kernel: int = 32,
n_features_per_kernel: int = 4,
estimator=None,
class_weight=None,
n_jobs=1,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
Expand Down
4 changes: 2 additions & 2 deletions aeon/classification/convolution_based/_rocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ class RocketClassifier(BaseClassifier):

def __init__(
self,
n_kernels=10000,
n_kernels: int = 10000,
estimator=None,
class_weight=None,
n_jobs=1,
n_jobs: int = 1,
random_state=None,
):
self.n_kernels = n_kernels
Expand Down
20 changes: 8 additions & 12 deletions aeon/classification/deep_learning/_lite_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,10 @@ class LITETimeClassifier(BaseClassifier):
if set to `True` then LITEMV is used. LITEMV is the
same architecture as LITE but specifically designed
to better handle multivariate time series.
n_filters : int or list of int32, default = 32
The number of filters used in one lite layer, if not a list, the same
number of filters is used in all lite layers.
kernel_size : int or list of int, default = 40
The head kernel size used for each lite layer, if not a list, the same
is used in all lite module.
n_filters : int, default = 32
The number of filters used in one lite layer.
kernel_size : int, default = 40
The head kernel size used for each lite layer.
strides : int or list of int, default = 1
The strides of kernels in convolution layers for each lite layer,
if not a list, the same is used in all lite layers.
Expand Down Expand Up @@ -340,12 +338,10 @@ class IndividualLITEClassifier(BaseDeepClassifier):
if set to `True` then LITEMV is used. LITEMV is the
same architecture as LITE but specifically designed
to better handle multivariate time series.
n_filters : int or list of int32, default = 32
The number of filters used in one lite layer, if not a list, the same
number of filters is used in all lite layers.
kernel_size : int or list of int, default = 40
The head kernel size used for each lite layer, if not a list, the same
is used in all lite layers.
n_filters : int, default = 32
The number of filters used in one lite layer.
kernel_size : int, default = 40
The head kernel size used for each lite layer.
strides : int or list of int, default = 1
The strides of kernels in convolution layers for each lite layer,
if not a list, the same is used in all lite layers.
Expand Down
37 changes: 30 additions & 7 deletions aeon/classification/deep_learning/_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,18 @@ class MLPClassifier(BaseDeepClassifier):
Parameters
----------
n_layers : int, optional (default=3)
The number of dense layers in the MLP.
n_units : Union[int, List[int]], optional (default=500)
Number of units in each dense layer.
activation : Union[str, List[str]], optional (default='relu')
Activation function(s) for each dense layer.
dropout_rate : Union[float, List[Union[int, float]]], optional (default=None)
Dropout rate(s) for each dense layer. If None, a default rate of 0.2 is used
except the first element, being 0.1. Dropout rate(s) are typically a number
in the interval [0, 1].
dropout_last : float, default = 0.3
The dropout rate of the last layer.
use_bias : bool, default = True
Condition on whether or not to use bias values for dense layers.
n_epochs : int, default = 2000
Expand Down Expand Up @@ -76,10 +88,6 @@ class MLPClassifier(BaseDeepClassifier):
a single string metric is provided, it will be
used as the only metric. If a list of metrics are
provided, all will be used for evaluation.
activation : string or a tf callable, default="sigmoid"
Activation function used in the output linear layer.
List of available activation functions:
https://keras.io/api/layers/activations/
Notes
-----
Expand All @@ -104,6 +112,11 @@ class MLPClassifier(BaseDeepClassifier):

def __init__(
self,
n_layers=3,
n_units=500,
activation="relu",
dropout_rate=None,
dropout_last=None,
use_bias=True,
n_epochs=2000,
batch_size=16,
Expand All @@ -120,16 +133,19 @@ def __init__(
last_file_name="last_model",
init_file_name="init_model",
random_state=None,
activation="sigmoid",
optimizer=None,
):
self.n_layers = n_layers
self.n_units = n_units
self.activation = activation
self.dropout_rate = dropout_rate
self.dropout_last = dropout_last
self.callbacks = callbacks
self.n_epochs = n_epochs
self.verbose = verbose
self.loss = loss
self.metrics = metrics
self.use_mini_batch_size = use_mini_batch_size
self.activation = activation
self.use_bias = use_bias
self.file_path = file_path
self.save_best_model = save_best_model
Expand All @@ -147,7 +163,14 @@ def __init__(
last_file_name=last_file_name,
)

self._network = MLPNetwork(use_bias=self.use_bias)
self._network = MLPNetwork(
n_layers=self.n_layers,
n_units=self.n_units,
activation=self.activation,
dropout_rate=self.dropout_rate,
dropout_last=self.dropout_last,
use_bias=self.use_bias,
)

def build_model(self, input_shape, n_classes, **kwargs):
"""Construct a compiled, un-trained, keras model that is ready for training.
Expand Down
29 changes: 14 additions & 15 deletions aeon/classification/dictionary_based/_redcomets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from aeon.classification.base import BaseClassifier
from aeon.transformations.collection import Normalizer
from aeon.transformations.collection.dictionary_based import SAX, SFA
from aeon.transformations.collection.dictionary_based import SAX, SFAFast
from aeon.utils.validation._dependencies import _check_soft_dependencies


Expand Down Expand Up @@ -62,7 +62,7 @@ class REDCOMETS(BaseClassifier):
See Also
--------
SAX, SFA
SAX, SFA, SFAFast
Notes
-----
Expand Down Expand Up @@ -182,9 +182,9 @@ def _build_univariate_ensemble(self, X, y):
Returns
-------
sfa_transforms :
List of ``SFA()`` instances with random word length and alpabet size
List of ``SFAFast()`` instances with random word length and alpabet size
sfa_clfs :
List of ``(RandomForestClassifier(), weight)`` tuples fitted on `SFA`
List of ``(RandomForestClassifier(), weight)`` tuples fitted on `SFAFast`
transformed training data
sax_transforms :
List of ``SAX()`` instances with random word length and alpabet size
Expand Down Expand Up @@ -241,7 +241,7 @@ def _build_univariate_ensemble(self, X, y):
cv = np.min([5, len(y_smote) // len(list(set(y_smote)))])

sfa_transforms = [
SFA(
SFAFast(
word_length=w,
alphabet_size=a,
window_size=X_smote.shape[1],
Expand All @@ -254,8 +254,9 @@ def _build_univariate_ensemble(self, X, y):

sfa_clfs = []
for sfa in sfa_transforms:
sfa_dics = sfa.fit_transform(X_smote, y_smote)
X_sfa = np.array([sfa.word_list(list(d.keys())[0]) for d in sfa_dics[0]])
sfa.fit(X_smote, y_smote)
sfa_dics = sfa.transform_words(X_smote)
X_sfa = sfa_dics[:, 0, :]

rf = RandomForestClassifier(
n_estimators=self.n_trees,
Expand Down Expand Up @@ -318,11 +319,11 @@ def _build_dimension_ensemble(self, X, y):
Returns
-------
sfa_transforms : list
List of lists of ``SFA()`` instances with random word length and alpabet
List of lists of ``SFAFast()`` instances with random word length and alpabet
size
sfa_clfs : list
List of lists of ``(RandomForestClassifier(), weight)`` tuples fitted on
`SFA` transformed training data
`SFAFast` transformed training data
sax_transforms : list
List of lists of ``SAX()`` instances with random word length and alpabet
size
Expand Down Expand Up @@ -416,8 +417,8 @@ def _predict_proba_unvivariate(self, X) -> np.ndarray:
pred_mat = np.zeros((X.shape[0], self.n_classes_))

for sfa, (rf, weight) in zip(self.sfa_transforms, self.sfa_clfs):
sfa_dics = sfa.transform(X)
X_sfa = np.array([sfa.word_list(list(d.keys())[0]) for d in sfa_dics[0]])
sfa_dics = sfa.transform_words(X)
X_sfa = sfa_dics[:, 0, :]

rf_pred_mat = rf.predict_proba(X_sfa)

Expand Down Expand Up @@ -471,10 +472,8 @@ def _predict_proba_dimension_ensemble(self, X) -> np.ndarray:
if self.variant in [6, 7, 8, 9]:
dimension_pred_mats = None
for sfa, (rf, _) in zip(sfa_transforms, sfa_clfs):
sfa_dics = sfa.transform(X_d)
X_sfa = np.array(
[sfa.word_list(list(d.keys())[0]) for d in sfa_dics[0]]
)
sfa_dics = sfa.transform_words(X_d)
X_sfa = sfa_dics[:, 0, :]

rf_pred_mat = rf.predict_proba(X_sfa)

Expand Down
Loading

0 comments on commit 460a378

Please sign in to comment.