Skip to content

Commit

Permalink
* fix test new columns name (#15)
Browse files Browse the repository at this point in the history
* fix seed
* new test CdfScaler
  • Loading branch information
Lionel MASSOULARD authored and gfournier committed Sep 13, 2019
1 parent d6b2555 commit 17f8a9a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
6 changes: 3 additions & 3 deletions tests/transformers/test_all_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,7 @@ def test_Word2VecVectorizer(text_preprocess, same_embedding_all_columns, size, w
df1 = Xtrain.loc[0:600, :]
df2 = Xtrain.loc[600:, :]

enc_kwargs = {"columns_to_use": ["Name", "Ticket"]}
enc_kwargs = {"columns_to_use": ["name", "ticket"]}
enc_kwargs["text_preprocess"] = text_preprocess
enc_kwargs["same_embedding_all_columns"] = same_embedding_all_columns
enc_kwargs["size"] = size
Expand Down Expand Up @@ -1311,7 +1311,7 @@ def test_Char2VecVectorizer(text_preprocess, same_embedding_all_columns, size, w
df1 = Xtrain.loc[0:600, :]
df2 = Xtrain.loc[600:, :]

enc_kwargs = {"columns_to_use": ["Name", "Ticket"]}
enc_kwargs = {"columns_to_use": ["name", "ticket"]}
enc_kwargs["text_preprocess"] = text_preprocess
enc_kwargs["same_embedding_all_columns"] = same_embedding_all_columns
enc_kwargs["size"] = size
Expand Down Expand Up @@ -2049,7 +2049,7 @@ def test_CdfScaler_with_params(distribution, output_distribution):
df2=df2_nona.loc[:, variable_by_type["NUM"]],
y1=y_train_shuffled,
klass=CdfScaler,
enc_kwargs={"distribution": distribution, "output_distribution": output_distribution},
enc_kwargs={"distribution": distribution, "output_distribution": output_distribution, "random_state":123},
all_types=(DataTypes.DataFrame, DataTypes.NumpyArray),
additional_test_functions=[
check_all_numerical,
Expand Down
14 changes: 14 additions & 0 deletions tests/transformers/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,20 @@ def test_CdfScaler():

assert scaler._model.distributions == ["normal", "gamma", "beta", "none"]

def test_CdfScaler_fit_vs_fit_transform():
np.random.seed(123)
X = np.random.randn(2000,10)

encoder = CdfScaler(distribution="kernel", output_distribution="uniform", random_state=123)
X1 = encoder.fit_transform(X)

encoder_b = CdfScaler(distribution="kernel", output_distribution="uniform", random_state=123)
encoder_b.fit(X)
X2 = encoder_b.transform(X)

assert np.abs(X1 - X2).max() <= 10**(-5)



def test_PassThrough():

Expand Down

0 comments on commit 17f8a9a

Please sign in to comment.