Skip to content

Commit

Permalink
fixing inheritance
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Feb 28, 2024
1 parent d2ad150 commit 74a1b8f
Show file tree
Hide file tree
Showing 23 changed files with 581 additions and 395 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,28 @@ jobs:
examples-tests:
runs-on: ubuntu-latest
steps:
- name: Free disk space
run: |
echo "=============================================="
echo "Freeing up disk space on CI system"
echo "=============================================="
echo "Listing 100 largest packages"
dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100
df -h
echo "Removing large packages"
sudo apt-get remove -y '^ghc-8.*'
sudo apt-get remove -y '^dotnet-.*'
sudo apt-get remove -y '^llvm-.*'
sudo apt-get remove -y 'php.*'
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel
sudo apt-get autoremove -y
sudo apt-get clean
df -h
echo "Removing large directories"
# deleting 15GB
rm -rf /usr/share/dotnet/
df -h
- name: Check out source repository
uses: actions/checkout@v3
- name: Set up Python environment
Expand Down
14 changes: 5 additions & 9 deletions atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -1273,7 +1273,7 @@ def _add_transformer(
self._log(f"Fitting {transformer_c.__class__.__name__}...", 1)

# Memoize the fitted transformer_c for repeated instantiations of atom
fit = self._memory.cache(fit_one)
fit = self.memory.cache(fit_one)
kwargs = {
"estimator": transformer_c,
"X": self.branch.X_train,
Expand All @@ -1283,11 +1283,8 @@ def _add_transformer(

# Check if the fitted estimator is retrieved from cache to inform
# the user, else user might notice the lack of printed messages
if self.memory.location is not None:
if fit._is_in_cache_and_valid([*fit._get_output_identifiers(**kwargs)]):
self._log(
f"Retrieving cached results for {transformer_c.__class__.__name__}...", 1
)
if fit.check_call_in_cache(**kwargs):
self._log(f"Loading cached results for {transformer_c.__class__.__name__}...", 1)

transformer_c = fit(**kwargs)

Expand All @@ -1306,7 +1303,6 @@ def _add_transformer(
self.branch.X_train if X is None else X,
self.branch.y_train if y is None else y,
)

else:
X, y = self.pipeline._mem_transform(transformer_c, self.branch.X, self.branch.y)
data = merge(self.branch.X if X is None else X, self.branch.y if y is None else y)
Expand Down Expand Up @@ -1543,8 +1539,8 @@ def balance(self, strategy: str | Estimator = "adasyn", **kwargs):
!!! warning
* The balance method does not support [multioutput tasks][].
* This transformation is only applied to the training set
in order to maintain the original distribution of target
classes in the test set.
to maintain the original distribution of target classes
in the test set.
!!! tip
Use atom's [classes][self-classes] attribute for an overview
Expand Down
112 changes: 77 additions & 35 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,16 @@ def __init__(
self._bootstrap: pd.DataFrame | None = None
self._time_bootstrap = 0.0

# Inject goal-specific methods from ClassRegModel or ForecastModel
cls = ForecastModel if goal is Goal.forecast else ClassRegModel
for n, m in vars(cls).items():
if not n.startswith("__"):
try:
setattr(self, n, m.__get__(self, cls))
except AttributeError:
# available_if descriptor raises an error
# if the estimator doesn't have the method
pass
# Inject goal-specific methods from ForecastModel
if goal is Goal.forecast and ClassRegModel in self.__class__.__bases__:
for n, m in vars(ForecastModel).items():
if not n.startswith("__"):
try:
setattr(self, n, m.__get__(self, ForecastModel))
except AttributeError:
# available_if descriptor raises an error
# if the estimator doesn't have the method
pass

# Skip this part if only initialized for the estimator
if branches:
Expand Down Expand Up @@ -2497,7 +2497,7 @@ def transform(
return pl.transform(Xt, yt)


class ClassRegModel:
class ClassRegModel(BaseModel):
"""Classification and regression models."""

@crash
Expand Down Expand Up @@ -2612,15 +2612,15 @@ def _prediction(
"""

def get_transform_X_y(
X: RowSelector | XSelector,
X: XSelector,
y: YSelector | None,
) -> tuple[pd.DataFrame, Pandas | None]:
"""Get X and y from the pipeline transformation.
Parameters
----------
X: hashable, segment, sequence or dataframe-like
Feature set. If not dataframe-like, expected to fail.
X: dataframe-like
Feature set.
y: int, str, sequence, dataframe-like or None
Target column(s) corresponding to `X`.
Expand Down Expand Up @@ -2664,12 +2664,13 @@ def assign_prediction_columns() -> list[str]:
# prediction calls from dataframes with reset indices
Xt, yt = get_transform_X_y(X, y)
else:
Xt, yt = self.branch._get_rows(X, return_X_y=True)
Xt, yt = self.branch._get_rows(X, return_X_y=True) # type: ignore[call-overload]

if self.scaler:
Xt = self.scaler.transform(Xt)
Xt = cast(pd.DataFrame, self.scaler.transform(Xt))

except Exception: # noqa: BLE001
Xt, yt = get_transform_X_y(X, y)
Xt, yt = get_transform_X_y(X, y) # type: ignore[arg-type]

if method != "score":
pred = np.array(self.memory.cache(getattr(self.estimator, method))(Xt[self.features]))
Expand Down Expand Up @@ -2705,7 +2706,7 @@ def assign_prediction_columns() -> list[str]:
scorer=scorer,
estimator=self.estimator,
X=Xt,
y=yt,
y=yt, # type: ignore[arg-type]
sample_weight=sample_weight,
)

Expand Down Expand Up @@ -2940,7 +2941,7 @@ def score(
)


class ForecastModel:
class ForecastModel(BaseModel):
"""Forecasting models."""

@crash
Expand Down Expand Up @@ -3049,20 +3050,54 @@ def _prediction(
called.
"""
if y is not None or X is not None:

def get_transform_X_y(
X: XSelector | None,
y: YSelector | None,
) -> tuple[pd.DataFrame, Pandas | None]:
"""Get X and y from the pipeline transformation.
Parameters
----------
X: dataframe-like or None
Feature set.
y: int, str, sequence, dataframe-like or None
Target column(s) corresponding to `X`.
Returns
-------
dataframe
Transformed feature set.
series, dataframe or None
Transformed target column.
"""
Xt, yt = self._check_input(X, y, columns=self.og.features, name=self.og.target)

with adjust(self.pipeline, verbose=verbose) as pl:
out = pl.transform(Xt, yt)

if isinstance(out, tuple):
Xt, yt = out
return out
elif X is not None:
Xt, yt = out, yt
return out, yt
else:
Xt, yt = Xt, out
return Xt, out

if y is not None:
try:
Xt, yt = self.branch._get_rows(y, return_X_y=True) # type: ignore[call-overload]

if self.scaler and not Xt.empty:
Xt = cast(pd.DataFrame, self.scaler.transform(Xt))

except Exception: # noqa: BLE001
Xt, yt = get_transform_X_y(X, y)

else:
Xt, yt = X, y
Xt, yt = get_transform_X_y(X, y)

if method != "score":
if "y" in sign(func := getattr(self.estimator, method)):
Expand Down Expand Up @@ -3186,17 +3221,24 @@ def predict_interval(
)

if inverse:
new_interval = pd.DataFrame(index=pred.index, columns=pred.columns)

# We pass every level of the multiindex to inverse_transform...
dfs = []
for level in pred.columns.levels[2]: # type: ignore[union-attr]
df = pred.loc[:, pred.columns.get_level_values(2) == level]
df.columns = df.columns.droplevel(level=(1, 2))
dfs.append(self.inverse_transform(y=df))

# ... and merge every level back to the original output
new_data = merge(*dfs)
new_data.columns = pred.columns
return self._convert(new_data)
for cover in pred.columns.levels[1]: # type: ignore[union-attr]
for level in pred.columns.levels[2]: # type: ignore[union-attr]
# Select only the lower or upper level columns
curr_cover = pred.columns.get_level_values(1)
curr_level = pred.columns.get_level_values(2)
df = pred.loc[:, (curr_cover == cover) & (curr_level == level)]

# Use original columns names
df.columns = df.columns.droplevel(level=(1, 2))

# Apply inverse transformation
for name, column in self.inverse_transform(y=df).items():
new_interval.loc[:, (name, cover, level)] = column

return self._convert(new_interval)
else:
return self._convert(pred)

Expand Down Expand Up @@ -3318,7 +3360,7 @@ def predict_residuals(
Parameters
----------
y: int, str, sequence or dataframe
y: int, str, sequence or dataframe-like
Ground truth observations.
X: hashable, segment, sequence, dataframe-like or None, default=None
Expand Down
6 changes: 3 additions & 3 deletions atom/baserunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from atom.basetracker import BaseTracker
from atom.basetransformer import BaseTransformer
from atom.data import Branch
from atom.models import MODELS, Stacking, Voting
from atom.models import MODELS, create_stacking_model, create_voting_model
from atom.pipeline import Pipeline
from atom.utils.constants import DF_ATTRS
from atom.utils.types import (
Expand Down Expand Up @@ -1500,7 +1500,7 @@ def stacking(

kwargs[regressor] = model._get_est({})

self._models.append(Stacking(models=models_c, name=name, **kw_model))
self._models.append(create_stacking_model(models=models_c, name=name, **kw_model))
self[name]._est_params = kwargs if self.task.is_forecast else {"cv": "prefit"} | kwargs

if train_on_test:
Expand Down Expand Up @@ -1568,7 +1568,7 @@ def voting(
)

self._models.append(
Voting(
create_voting_model(
models=models_c,
name=name,
goal=self._goal,
Expand Down
4 changes: 2 additions & 2 deletions atom/basetrainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from atom.baserunner import BaseRunner
from atom.data import BranchManager
from atom.data_cleaning import BaseTransformer
from atom.models import MODELS, CustomModel
from atom.models import MODELS, create_custom_model
from atom.plots import RunnerPlot
from atom.utils.types import Model, Verbose, sequence_t
from atom.utils.utils import (
Expand Down Expand Up @@ -218,7 +218,7 @@ def _prepare_parameters(self):
inc.append(model)

else: # Model is a custom estimator
inc.append(CustomModel(estimator=model, **kwargs))
inc.append(create_custom_model(estimator=model, **kwargs))

if inc and exc:
raise ValueError(
Expand Down
1 change: 1 addition & 0 deletions atom/basetransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,7 @@ def warnings(self, value: Bool | Warnings):
warnings.filterwarnings("ignore", category=FutureWarning, module=".*imblearn.*")
warnings.filterwarnings("ignore", category=UserWarning, module=".*sktime.*")
warnings.filterwarnings("ignore", category=DeprecationWarning, module=".*shap.*")
warnings.filterwarnings("ignore", category=ResourceWarning, module=".*ray.*")
os.environ["PYTHONWARNINGS"] = self._warnings # Affects subprocesses (joblib)

@property
Expand Down
15 changes: 10 additions & 5 deletions atom/data/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,16 +167,21 @@ def name(self) -> str:

@name.setter
def name(self, value: str):
from atom.models import MODELS_ENSEMBLES # Avoid circular import
from atom.models import MODELS

if not value:
raise ValueError("A branch can't have an empty name!")
raise ValueError("A branch can't have an empty name.")
elif value.lower().startswith(("stack", "vote")):
raise ValueError(
"Invalid name for the branch. The name of a "
"branch can't begin with 'stack' or 'vote'."
)
else:
for model in MODELS_ENSEMBLES:
for model in MODELS:
if re.match(model.acronym, value, re.I):
raise ValueError(
"Invalid name for the branch. The name of a branch can "
f"not begin with a model's acronym, and {model.acronym} "
"Invalid name for the branch. The name of a branch can't "
f"begin with a model's acronym, and {model.acronym} "
f"is the acronym of the {model.__name__} model."
)

Expand Down
10 changes: 2 additions & 8 deletions atom/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
QuadraticDiscriminantAnalysis, RadiusNearestNeighbors, RandomForest, Ridge,
StochasticGradientDescent, SupportVectorMachine, XGBoost,
)
from atom.models.custom import CustomModel
from atom.models.ensembles import Stacking, Voting
from atom.models.custom import create_custom_model
from atom.models.ensembles import create_stacking_model, create_voting_model
from atom.models.ts import (
ARIMA, BATS, ETS, MSTL, SARIMAX, STL, TBATS, VAR, VARMAX, AutoARIMA,
AutoETS, Croston, DynamicFactor, ExponentialSmoothing, NaiveForecaster,
Expand Down Expand Up @@ -87,9 +87,3 @@
XGBoost,
key="acronym",
)

# Available ensembles
ENSEMBLES = ClassMap(Stacking, Voting, key="acronym")

# Available models + ensembles
MODELS_ENSEMBLES = ClassMap(*MODELS, *ENSEMBLES, key="acronym")
Loading

0 comments on commit 74a1b8f

Please sign in to comment.