Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add Yeo-Johnson transformer #29

Merged
merged 3 commits into from
Sep 8, 2023
Merged

feat: Add Yeo-Johnson transformer #29

merged 3 commits into from
Sep 8, 2023

Conversation

metaboulie
Copy link
Contributor

@transformer
def yeojohnson(brack: tuple = (-2, 2)):
    """Applies the Yeo-Johnson transformation to numeric columns in a panel DataFrame.

    Parameters
    ----------
    brack : 2-tuple, optional
        The starting interval for a downhill bracket search with optimize.brent. Note that this
        is in most cases not critical; the final result is allowed to be outside this bracket.
    """

    def transform(X: pl.LazyFrame) -> pl.LazyFrame:
        idx_cols = X.columns[:2]
        entity_col, time_col = idx_cols
        gb = X.groupby(X.columns[0])
        # Step 1. Compute optimal lambdas
        lmbds = gb.agg(
            PL_NUMERIC_COLS(entity_col, time_col)
            .apply(lambda x: yeojohnson_normmax(x, brack))
            .cast(pl.Float64())
            .suffix("__lmbd")
        )
        # Step 2. Transform
        cols = X.select(PL_NUMERIC_COLS(entity_col, time_col)).columns
        X_new = X.join(lmbds, on=entity_col, how="left").select(
            idx_cols
            + [
                pl.when((pl.col(col) >= 0) & (pl.col(f"{col}__lmbd") == 0))
                .then(pl.col(col).log1p())
                .when(pl.col(col) >= 0)
                .then(((pl.col(col) + 1) ** pl.col(f"{col}__lmbd") - 1)
                      / pl.col(f"{col}__lmbd"))
                .when((pl.col(col) < 0) & (pl.col(f"{col}__lmbd") == 2))
                .then(-pl.col(col).log1p())
                .otherwise(-((-pl.col(col) + 1) ** (2 - pl.col(f"{col}__lmbd")) - 1)
                           / (2 - pl.col(f"{col}__lmbd")))
                for col in cols
            ]
        )
        artifacts = {"X_new": X_new, "lmbds": lmbds}
        return artifacts

    def invert(state: ModelState, X: pl.LazyFrame) -> pl.LazyFrame:
        entity_col, time_col = X.columns[:2]
        lmbds = state.artifacts["lmbds"]
        cols = X.select(PL_NUMERIC_COLS(entity_col, time_col)).columns
        X_new = (
            X.join(lmbds, on=entity_col, how="left", suffix="__lmbd")
            .with_columns(
                [
                    pl.when((pl.col(col) >= 0) & (pl.col(f"{col}__lmbd") == 0))
                    .then((pl.col(col).exp()) - 1)
                    .when(pl.col(col) >= 0)
                    .then((pl.col(col) * pl.col(f"{col}__lmbd") + 1) ** (1 / pl.col(f"{col}__lmbd")) - 1)
                    .when((pl.col(col) < 0) & (pl.col(f"{col}__lmbd") == 2))
                    .then(1 - (-(pl.col(col)).exp()))
                    .otherwise(
                        (1 - (-(2 - pl.col(f"{col}__lmbd")) * pl.col(col) + 1) **
                         (1 / (2 - pl.col(f"{col}__lmbd")))
                         )
                    )
                    for col in cols
                ]
            )
            .select(X.columns)
        )
        return X_new

    return transform, invert

@vercel
Copy link

vercel bot commented Sep 4, 2023

The latest updates on your projects. Learn more about Vercel for Git ↗︎

Name Status Preview Comments Updated (UTC)
functime-docs ✅ Ready (Inspect) Visit Preview 💬 Add feedback Sep 6, 2023 9:17am

@topher-lo
Copy link
Contributor

topher-lo commented Sep 4, 2023

Hi @metaboulie, thanks for making a contribution! We recently created a discord channel for functime: https://discord.gg/jqDyuFa3. Will be great to meet you there 🤝

@topher-lo topher-lo added the enhancement New feature or request label Sep 4, 2023
@topher-lo topher-lo changed the title Add Yeo-Johnson Transformation to Preprocessing and update relevant t… feat: Add Yeo-Johnson transformer Sep 4, 2023
@metaboulie
Copy link
Contributor Author

Hi @metaboulie, thanks for making a contribution! We recently created a discord channel for functime: https://discord.gg/jqDyuFa3. Will be great to meet you there 🤝

I'd like to join the channel, but it appears that this invitation is invalid, could you please offer me another link or something? Thanks a lot 😶‍🌫️

@topher-lo
Copy link
Contributor

@metaboulie https://discord.gg/J486Jezh here's a new link!

@topher-lo
Copy link
Contributor

If it's still not working, here's my email: chris@neocortexdb.com just send over your email or discord username and I can send an invite directly!

Copy link
Contributor

@topher-lo topher-lo left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. @metaboulie congrats on your first PR here! 🎊

@topher-lo topher-lo merged commit 8b03f68 into functime-org:main Sep 8, 2023
topher-lo pushed a commit that referenced this pull request Dec 19, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants