From c93e3011f343ec4181bc1585e701b28ca5e4c27c Mon Sep 17 00:00:00 2001
From: Lionel Massoulard <lionel.massoulard@gmail.com>
Date: Sat, 14 Sep 2019 18:47:19 +0200
Subject: [PATCH 1/3] initialize seed

---
 aikit/ml_machine/jobs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/aikit/ml_machine/jobs.py b/aikit/ml_machine/jobs.py
index 2e0ea0e..2559e07 100644
--- a/aikit/ml_machine/jobs.py
+++ b/aikit/ml_machine/jobs.py
@@ -63,6 +63,7 @@ def __init__(
         self.done_queue_sleeping_time = done_queue_sleeping_time
 
         self.seed = seed
+        self.random_state = seed
 
     @property
     def random_state(self):

From c2a63c443c117247429115acb8ef65fa0a865b18 Mon Sep 17 00:00:00 2001
From: Lionel Massoulard <lionel.massoulard@gmail.com>
Date: Sat, 14 Sep 2019 18:48:03 +0200
Subject: [PATCH 2/3] make 'guess_type_of_variable' works with boolean serie

---
 aikit/tools/db_informations.py        |  9 ++++++++-
 tests/tools/test_db_informations.py   | 11 ++++++++++-
 tests/transformers/test_categories.py | 20 ++++++++++++++++++++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/aikit/tools/db_informations.py b/aikit/tools/db_informations.py
index 314c18b..86d01d7 100644
--- a/aikit/tools/db_informations.py
+++ b/aikit/tools/db_informations.py
@@ -83,12 +83,19 @@ def guess_type_of_variable(s):
     elif "object" in st:
         nb_u = s.nunique()  # number of different values
         nb = len(s)  # number of items
-        avg_l = s.str.len().mean()
+        
+        if hasattr(s,"str"): #For boolean 
+            avg_l = s.str.len().mean()
+        else:
+            avg_l = 0
 
         if avg_l >= 50 or nb_u >= 0.5 * nb:
             return TypeOfVariables.TEXT
 
         return TypeOfVariables.CAT
+    
+    elif "bool" in st:
+        return TypeOfVariables.CAT
 
     else:
         raise NotImplementedError("I don't know that type of Series : %s, please check" % st)
diff --git a/tests/tools/test_db_informations.py b/tests/tools/test_db_informations.py
index 3d0493e..a01f439 100644
--- a/tests/tools/test_db_informations.py
+++ b/tests/tools/test_db_informations.py
@@ -8,7 +8,7 @@
 import pandas as pd
 import numpy as np
 
-from aikit.tools.db_informations import has_missing_values
+from aikit.tools.db_informations import has_missing_values, guess_type_of_variable, TypeOfVariables
 
 
 def test_has_missing_values():
@@ -27,3 +27,12 @@ def test_has_missing_values():
 
 def verif_all():
     test_has_missing_values()
+
+
+def test_guess_type_of_variable_boolean():
+    s = pd.Series([True,False,True,None]*10)
+    assert guess_type_of_variable(s) == TypeOfVariables.CAT
+    
+    s = pd.Series([True,False,True]*10)
+    assert guess_type_of_variable(s) == TypeOfVariables.CAT
+    
diff --git a/tests/transformers/test_categories.py b/tests/transformers/test_categories.py
index 31d9ba7..578465b 100644
--- a/tests/transformers/test_categories.py
+++ b/tests/transformers/test_categories.py
@@ -243,6 +243,26 @@ def test_NumericalEncoder_num_fit_parameters():
     assert len(encoder.model.variable_modality_mapping['cat_col_1']) == 4
     assert len(encoder.model.variable_modality_mapping['cat_col_2']) == 4
     assert len(encoder.model.variable_modality_mapping['cat_col_3']) == 4
+    
+    assert res["cat_col_1"].nunique() == 4
+    assert res["cat_col_2"].nunique() == 4
+    assert res["cat_col_3"].nunique() == 4
+
+
+def test_NumericalEncoder_with_boolean():
+    dfX = pd.DataFrame({"c":[True,False]*200})
+    
+    enc = NumericalEncoder()
+    
+    dfX_encoded = enc.fit_transform(dfX)
+    
+    assert "c__True" in dfX_encoded.columns
+    assert "c__False" in dfX_encoded.columns
+    assert ((dfX_encoded["c__True"] == 1) == (dfX["c"])).all()
+    assert ((dfX_encoded["c__False"] == 1) == (~dfX["c"])).all()
+    assert dfX_encoded["c__True"].dtype == np.int32
+    assert dfX_encoded["c__False"].dtype == np.int32
+
 
 @pytest.mark.xfail()
 def test_bug_CategoryEncoder():

From 21bee76e727647e789caf645e28d6239b355146f Mon Sep 17 00:00:00 2001
From: Lionel Massoulard <lionel.massoulard@gmail.com>
Date: Sat, 14 Sep 2019 18:49:12 +0200
Subject: [PATCH 3/3] more tests on RandomModelGenerator

 * specific hyper
 * filter models
---
 tests/ml_machine/test_ml_machine.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/ml_machine/test_ml_machine.py b/tests/ml_machine/test_ml_machine.py
index 1536850..e1ff1c4 100644
--- a/tests/ml_machine/test_ml_machine.py
+++ b/tests/ml_machine/test_ml_machine.py
@@ -296,10 +296,16 @@ def _all_same(all_gen):
 
     return True
 
-
-def test_RandomModelGenerator_random():
+@pytest.mark.parametrize("specific_hyper, only_random_forest",[(True,True),(True,False),(False,True),(False,False)])
+def test_RandomModelGenerator_random(specific_hyper, only_random_forest):
 
     dfX, y, auto_ml_config = get_automl_config()
+    
+    if specific_hyper:
+        auto_ml_config.specific_hyper = {('Model', 'RandomForestClassifier') : {"n_estimators":[10,20]}}
+        
+    if only_random_forest:
+        auto_ml_config.filter_models(Model='RandomForestClassifier')
 
     random_model_generator = RandomModelGenerator(auto_ml_config=auto_ml_config, random_state=123)
 
@@ -331,6 +337,17 @@ def test_RandomModelGenerator_random():
 
         model = sklearn_model_from_param(result["json_code"])
         assert hasattr(model, "fit")
+        
+        rf_key = ('Model', ('Model', 'RandomForestClassifier'))
+        if only_random_forest:
+            assert rf_key in all_models_params
+            
+        if specific_hyper:
+            if rf_key in all_models_params:
+                assert all_models_params[rf_key]["n_estimators"] in (10,20)
+    
+    if not only_random_forest:
+        assert any([ rf_key not in m[1] for m in all_gen]) # Check that RandomForest wasn't drawn every time
 
     ### re-draw them thing with other seed ###
     random_model_generator = RandomModelGenerator(auto_ml_config=auto_ml_config, random_state=123)