* fix test new columns name (#15)

* fix seed * new test CdfScaler
societe-generale · Sep 13, 2019 · 17f8a9a · 17f8a9a
1 parent d6b2555
commit 17f8a9a
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 3 deletions.
diff --git a/tests/transformers/test_all_transformers.py b/tests/transformers/test_all_transformers.py
@@ -1272,7 +1272,7 @@ def test_Word2VecVectorizer(text_preprocess, same_embedding_all_columns, size, w
     df1 = Xtrain.loc[0:600, :]
     df2 = Xtrain.loc[600:, :]
 
-    enc_kwargs = {"columns_to_use": ["Name", "Ticket"]}
+    enc_kwargs = {"columns_to_use": ["name", "ticket"]}
     enc_kwargs["text_preprocess"] = text_preprocess
     enc_kwargs["same_embedding_all_columns"] = same_embedding_all_columns
     enc_kwargs["size"] = size
@@ -1311,7 +1311,7 @@ def test_Char2VecVectorizer(text_preprocess, same_embedding_all_columns, size, w
     df1 = Xtrain.loc[0:600, :]
     df2 = Xtrain.loc[600:, :]
 
-    enc_kwargs = {"columns_to_use": ["Name", "Ticket"]}
+    enc_kwargs = {"columns_to_use": ["name", "ticket"]}
     enc_kwargs["text_preprocess"] = text_preprocess
     enc_kwargs["same_embedding_all_columns"] = same_embedding_all_columns
     enc_kwargs["size"] = size
@@ -2049,7 +2049,7 @@ def test_CdfScaler_with_params(distribution, output_distribution):
         df2=df2_nona.loc[:, variable_by_type["NUM"]],
         y1=y_train_shuffled,
         klass=CdfScaler,
-        enc_kwargs={"distribution": distribution, "output_distribution": output_distribution},
+        enc_kwargs={"distribution": distribution, "output_distribution": output_distribution, "random_state":123},
         all_types=(DataTypes.DataFrame, DataTypes.NumpyArray),
         additional_test_functions=[
             check_all_numerical,

diff --git a/tests/transformers/test_base.py b/tests/transformers/test_base.py
@@ -633,6 +633,20 @@ def test_CdfScaler():
 
     assert scaler._model.distributions == ["normal", "gamma", "beta", "none"]
 
+def test_CdfScaler_fit_vs_fit_transform():
+    np.random.seed(123)
+    X = np.random.randn(2000,10)
+
+    encoder = CdfScaler(distribution="kernel", output_distribution="uniform", random_state=123)
+    X1 = encoder.fit_transform(X)
+
+    encoder_b = CdfScaler(distribution="kernel", output_distribution="uniform", random_state=123)
+    encoder_b.fit(X)
+    X2 = encoder_b.transform(X)
+
+    assert np.abs(X1 - X2).max() <= 10**(-5)
+
+
 
 def test_PassThrough():