From 1e975d94bf09c596acf58380d882b46dc839242b Mon Sep 17 00:00:00 2001
From: hiepnguyen034 <hiep.nguyen@quantumblack.com>
Date: Tue, 4 May 2021 17:23:46 +0700
Subject: [PATCH 1/4] Add advanced discretisation strategies (#149)

---
 RELEASE.md                                    |   5 +-
 causalnex/discretiser/__init__.py             |  10 +-
 causalnex/discretiser/abstract_discretiser.py | 114 +++++
 causalnex/discretiser/discretiser_strategy.py | 300 +++++++++++++
 causalnex/utils/__init__.py                   |   0
 causalnex/utils/decision_tree_tools.py        |  63 +++
 setup.py                                      |   1 +
 test_requirements.txt                         |   1 +
 tests/discretiser/conftest.py                 |  99 +++++
 tests/discretiser/test_base.py                |  48 ++
 tests/discretiser/test_decision_tree.py       | 418 ++++++++++++++++++
 tests/discretiser/test_mdlp.py                | 108 +++++
 12 files changed, 1164 insertions(+), 3 deletions(-)
 create mode 100644 causalnex/discretiser/abstract_discretiser.py
 create mode 100644 causalnex/discretiser/discretiser_strategy.py
 create mode 100644 causalnex/utils/__init__.py
 create mode 100644 causalnex/utils/decision_tree_tools.py
 create mode 100644 tests/discretiser/conftest.py
 create mode 100644 tests/discretiser/test_base.py
 create mode 100644 tests/discretiser/test_decision_tree.py
 create mode 100644 tests/discretiser/test_mdlp.py

diff --git a/RELEASE.md b/RELEASE.md
index b9f0969..9e8e5f3 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,10 +1,11 @@
 # Upcoming release
 
 # Release 0.10.0
-
 * Add utility function to extract Markov blanket from a Bayesian Network
 * Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
-* Fixes cyclical import of `causalnex.plots`, as per #106.
+* Fixes cyclical import of `causalnex.plots`, as per #106
+* Add supervised discretisation strategies using Decision Tree and MDLP algorithms
+
 
 # Release 0.9.2
 * Remove Boston housing dataset from "sklearn tutorial", see #91 for more information.
diff --git a/causalnex/discretiser/__init__.py b/causalnex/discretiser/__init__.py
index 2dd643b..c4b29bc 100644
--- a/causalnex/discretiser/__init__.py
+++ b/causalnex/discretiser/__init__.py
@@ -30,6 +30,14 @@
 ``causalnex.discretiser`` provides functionality to discretise data.
 """
 
-__all__ = ["Discretiser"]
+__all__ = [
+    "Discretiser",
+    "DecisionTreeSupervisedDiscretiserMethod",
+    "MDLPSupervisedDiscretiserMethod",
+]
 
 from .discretiser import Discretiser
+from .discretiser_strategy import (
+    DecisionTreeSupervisedDiscretiserMethod,
+    MDLPSupervisedDiscretiserMethod,
+)
diff --git a/causalnex/discretiser/abstract_discretiser.py b/causalnex/discretiser/abstract_discretiser.py
new file mode 100644
index 0000000..031000a
--- /dev/null
+++ b/causalnex/discretiser/abstract_discretiser.py
@@ -0,0 +1,114 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tools to help discretise data."""
+
+import logging
+from abc import ABC, abstractmethod
+from typing import List
+
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator
+
+
+class AbstractSupervisedDiscretiserMethod(BaseEstimator, ABC):
+    """
+    Base class for advanced discretisation methods
+
+    """
+
+    def __init__(self):
+        self.map_thresholds = {}
+        self.feat_names = None
+
+    @abstractmethod
+    def fit(
+        self,
+        feat_names: List[str],
+        target: str,
+        dataframe: pd.DataFrame,
+        target_continuous: bool,
+    ):
+        """
+        Discretise the features in `feat_names` in such a way that maximises the prediction of `target`.
+
+        Args:
+            feat_names (List[str]): List of feature names to be discretised.
+            target (str): Name of the target variable - the node that adjusts how `feat_names` will be discretised
+            dataframe: The full dataset prior to discretisation.
+            target_continuous (bool): Boolean indicates if target variable is continuous
+        Raises:
+            NotImplementedError: AbstractSupervisedDiscretiserMethod should not be called directly
+
+        """
+        raise NotImplementedError("The method is not implemented")
+
+    def _transform_one_column(self, dataframe_one_column: pd.DataFrame) -> np.array:
+        """
+        Given one "original" feature (continuous), discretise it.
+
+        Args:
+            dataframe_one_column: dataframe with a single continuous feature, to be transformed into discrete
+        Returns:
+            Discrete feature, as an np.array of shape (len(df),)
+        """
+        cols = list(dataframe_one_column.columns)
+        if cols[0] in self.map_thresholds:
+            split_points = self.map_thresholds[cols[0]]
+            return np.digitize(dataframe_one_column.values.reshape(-1), split_points)
+
+        if cols[0] not in self.feat_names:
+            logging.warning(
+                "%s is not in feat_names. The column is left unchanged", cols[0]
+            )
+        return dataframe_one_column.values.reshape(-1)
+
+    def transform(self, data: pd.DataFrame) -> np.array:
+        """
+        Given one "original" dataframe, discretise it.
+
+        Args:
+            data: dataframe with continuous features, to be transformed into discrete
+        Returns:
+            discretised version of the input data
+        """
+        outputs = {}
+        for col in data.columns:
+            outputs[col] = self._transform_one_column(data[[col]])
+
+        transformed_df = pd.DataFrame.from_dict(outputs)
+        return transformed_df
+
+    def fit_transform(self, *args, **kwargs):
+        """
+        Raises:
+            NotImplementedError: fit_transform is not implemented
+        """
+        raise NotImplementedError(
+            "fit_transform is not implemented. Please use .fit() and .transform() separately"
+        )
diff --git a/causalnex/discretiser/discretiser_strategy.py b/causalnex/discretiser/discretiser_strategy.py
new file mode 100644
index 0000000..8431ec1
--- /dev/null
+++ b/causalnex/discretiser/discretiser_strategy.py
@@ -0,0 +1,300 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tools to help discretise data."""
+
+import logging
+from copy import deepcopy
+from typing import Any, Dict, List
+
+import pandas as pd
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+from causalnex.discretiser.abstract_discretiser import (
+    AbstractSupervisedDiscretiserMethod,
+)
+from causalnex.utils.decision_tree_tools import extract_thresholds_from_dtree
+
+try:
+    from mdlp.discretization import MDLP
+except ImportError:
+    MDLP = None
+    logging.warning("MDLP was not imported successfully")
+
+
+class DecisionTreeSupervisedDiscretiserMethod(AbstractSupervisedDiscretiserMethod):
+    """Allows the discretisation of continuous features based on the split thresholds of either
+    sklearn's DecisionTreeRegressor or DecisionTreeClassifier.
+    DecisionTreeSupervisedDiscretiserMethod is inhereited from AbstractSupervisedDiscretiserMethod.
+    When instantiated, we have an object with .fit method to learn discretisation thresholds from data
+    and .transform method to process the input.
+
+
+    Example:
+    ::
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> from causalnex.discretiser.discretiser_strategy import DecisionTreeSupervisedDiscretiserMethod
+        >>> from sklearn.datasets import load_iris
+        >>> iris = load_iris()
+        >>> X, y = iris["data"], iris["target"]
+        >>> names = iris["feature_names"]
+        >>> data = pd.DataFrame(X, columns=names)
+        >>> data["target"] = y
+        >>> dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+        >>>     mode="multi", tree_params={"max_depth": 3, "random_state": 2020}
+        >>> )
+        >>> tree_discretiser = dt_multi.fit(
+        >>>     feat_names=[
+        >>>         "sepal length (cm)",
+        >>>         "sepal width (cm)",
+        >>>         "petal length (cm)",
+        >>>         "petal width (cm)",
+        >>>     ],
+        >>>     dataframe=data,
+        >>>     target="target",
+        >>>     target_continuous=False,
+        >>> )
+        >>> discretised_data = tree_discretiser.transform(data[["petal width (cm)"]])
+        >>> discretised_data.values.ravel()
+        array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+           1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
+           2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2,
+           2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
+
+    """
+
+    def __init__(
+        self,
+        mode: str = "single",
+        split_unselected_feat: bool = False,
+        tree_params: Dict[str, Any] = None,
+    ):
+        """
+        This Discretiser Method uses Decision Trees to predict the target.
+        The cutting points on the the Decision Tree becomes the chosen discretisation thresholds
+
+        If the target is a continuous variable, we fit a `DecisionTreeRegressor` to discretise the data.
+        Otherwise, we fit a Classifier.
+
+        Args:
+            max_depth (int): maximum depth of the decision tree.
+            mode (str): Either 'single' or 'multi'.
+            - if single, Train a univariate decision tree for each continuous variable being discretised.
+                The splitting points of the decision tree become discretiser fixed points
+            - if multi, Train a decision tree over all the variables passed.
+                The splitting points of each variable used in the Decision tree become the thresholds for discretisation
+            split_unselected_feat (bool): only applicable if self.mode = 'multi'.
+            - if True, features not selected by the decision tree will be discretised using 'single' mode
+            with the same tree parameters
+            - if False, features not selected by the decision tree will be left unchanged
+            tree_params: keyword arguments, which are parameters
+            used for `sklearn.tree.DecisionTreeClassifier`/`sklearn.tree.DecisionTreeRegressor`
+        Raises:
+            KeyError: if an incorrect argument is passed
+        """
+
+        super().__init__()
+        tree_params = tree_params or {"max_depth": 2}
+        self.tree_params = tree_params
+        self.feat_names = None
+        self.map_thresholds = {}
+        if mode not in ["single", "multi"]:
+            raise KeyError(
+                f"mode, `{mode}` is not valid, please choose in ['single', 'multi']"
+            )
+        self.mode = mode
+        self.split_unselected_feat = split_unselected_feat
+
+    def fit(
+        self,
+        feat_names: List[str],
+        target: str,
+        dataframe: pd.DataFrame,
+        target_continuous: bool,
+    ) -> "DecisionTreeSupervisedDiscretiserMethod":
+        """
+        The fit method allows DecisionTrees to learn split thresholds from the input data
+
+        Args:
+            feat_names (List[str]): a list of feature to be discretised
+            target (str): name of variable that is going to be used a target for the decision tree
+            dataframe (pd.DataFrame): pandas dataframe of input data
+            target_continuous (bool): a boolean that indicates if the target variable is continuous
+
+        Returns:
+            self: DecisionTreeSupervisedDiscretiserMethod object with learned split thresholds from the decision tree
+        """
+        dtree = (
+            DecisionTreeRegressor(**self.tree_params)
+            if target_continuous
+            else DecisionTreeClassifier(**self.tree_params)
+        )
+        self.feat_names = feat_names
+        self.map_thresholds = {}
+
+        if self.mode == "single":
+            for feat in feat_names:
+                dtree = deepcopy(dtree)
+
+                dtree.fit(dataframe[[feat]], dataframe[[target]])
+                thresholds = extract_thresholds_from_dtree(dtree, 1)[0]
+                self.map_thresholds[feat] = thresholds
+
+        elif self.mode == "multi":
+            dtree = deepcopy(dtree)
+            dtree.fit(dataframe[feat_names], dataframe[[target]])
+            threshold_list = extract_thresholds_from_dtree(dtree, len(feat_names))
+
+            for feat, threshold in zip(feat_names, threshold_list):
+                self.map_thresholds[feat] = threshold
+
+            if self.split_unselected_feat:
+                for feat in self.map_thresholds:
+                    if self.map_thresholds[feat].size == 0:
+                        dtree = deepcopy(dtree)
+                        dtree.fit(dataframe[[feat]], dataframe[[target]])
+                        thresholds = extract_thresholds_from_dtree(dtree, 1)[0]
+                        self.map_thresholds[feat] = thresholds
+
+            else:
+                no_use = []
+                for feat in list(self.map_thresholds.keys()):
+                    if self.map_thresholds[feat].size == 0:
+                        no_use.append(feat)
+                        del self.map_thresholds[feat]
+                if len(no_use) > 0:
+                    logging.warning(
+                        "%s not selected by the decision tree. No discretisation thresholds were learned. "
+                        "Consider setting split_unselected_feat = True or discretise them using single mode",
+                        no_use,
+                    )
+
+        return self
+
+
+class MDLPSupervisedDiscretiserMethod(AbstractSupervisedDiscretiserMethod):
+    """Allows discretisation of continuous features using mdlp algorithm
+
+    Example:
+    ::
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> from causalnex.discretiser.discretiser_strategy import MDLPSupervisedDiscretiserMethod
+        >>> from sklearn.datasets import load_iris
+        >>> iris = load_iris()
+        >>> X, y = iris["data"], iris["target"]
+        >>> names = iris["feature_names"]
+        >>> data = pd.DataFrame(X, columns=names)
+        >>> data["target"] = y
+        >>> discretiser = MDLPSupervisedDiscretiserMethod(
+        >>>     {"min_depth": 0, "random_state": 2020, "min_split": 1e-3, "dtype": int}
+        >>> )
+        >>> discretiser.fit(
+        >>>     feat_names=["sepal length (cm)"],
+        >>>     dataframe=data,
+        >>>     target="target",
+        >>>     target_continuous=False,
+        >>> )
+        >>> discretised_data = discretiser.transform(data[["sepal length (cm)"]])
+        >>> discretised_data.values.ravel()
+        array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 2, 2, 2, 1, 2,
+               1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 0, 2, 2, 2,
+               1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2, 2, 0, 2, 2, 2,
+               2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+               2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2])
+
+    """
+
+    def __init__(
+        self,
+        mdlp_args: Dict[str, Any] = None,
+    ):
+        """
+        This method of discretisation applies MDLP to discretise the data
+
+        Args:
+            min_depth: The minimum depth of the interval splitting.
+            min_split: The minmum size to split a bin
+            dtype: The type of the array returned by the `transform()` method
+            **dlp_args: keyword arguments, which are parameters used for `mdlp.discretization.MDLP`
+        Raises:
+            ImportError: if mdlp-discretization is not installed successfully
+        """
+        super().__init__()
+        mdlp_args = mdlp_args or {"min_depth": 0, "min_split": 1e-3, "dtype": int}
+        self.mdlp_args = mdlp_args
+        self.feat_names = None
+        self.map_feat_transformer = {}
+        if MDLP is None:
+            raise ImportError(
+                "mdlp-discretisation was not installed and imported successfully"
+            )
+        self.mdlp = MDLP(**mdlp_args)
+
+    def fit(
+        self,
+        feat_names: List[str],
+        target: str,
+        dataframe: pd.DataFrame,
+        target_continuous: bool,
+    ) -> "MDLPSupervisedDiscretiserMethod":
+        """
+        The fit method allows MDLP to learn split thresholds from the input data.
+        The target feature cannot be continuous
+
+        Args:
+            feat_names (List[str]): a list of feature to be discretised
+            target (str): name of the variable that is going to be used a target for MDLP
+            dataframe (pd.DataFrame): pandas dataframe of input data
+            target_continuous (bool): boolean that indicates if target variable is continuous.
+
+        Returns:
+            self: MDLPSupervisedDiscretiserMethod object with learned split thresholds from mdlp algorithm
+
+        Raises:
+            ValueError: if the target is continuous
+        """
+        self.feat_names = feat_names
+        self.map_feat_transformer = {}
+        if target_continuous:
+            raise ValueError(
+                "Target variable should not be continuous when using MDLP."
+            )
+
+        for feat in feat_names:
+            mdlp = deepcopy(self.mdlp)
+
+            mdlp.fit(dataframe[[feat]], dataframe[[target]])
+            self.map_thresholds[feat] = mdlp.cut_points_[0]
+
+        return self
diff --git a/causalnex/utils/__init__.py b/causalnex/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/causalnex/utils/decision_tree_tools.py b/causalnex/utils/decision_tree_tools.py
new file mode 100644
index 0000000..afa6146
--- /dev/null
+++ b/causalnex/utils/decision_tree_tools.py
@@ -0,0 +1,63 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Helper functions for advanced discretisations"""
+
+from typing import List, Union
+
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+
+def extract_thresholds_from_dtree(
+    dtree: Union[DecisionTreeClassifier, DecisionTreeRegressor],
+    length_df: int,
+) -> List[np.array]:
+    """A helper function that extracts the decision threshold of a decision tree
+
+    Args:
+        dtree: A decisiontree model object
+        length_df (int): length of the target dataframe
+
+    Returns:
+        a list of numpy array indicating the thersholds for each feature
+    """
+
+    tree_threshold = dtree.tree_.threshold
+    tree_feature = dtree.tree_.feature
+
+    # store decision thresholds of all features in a list
+    thresholds_for_features = []
+
+    for feat in range(length_df):
+        if feat not in tree_feature:
+            thresholds_for_features.append(np.array([]))
+        else:
+            thresholds_for_features.append(
+                np.unique(tree_threshold[tree_feature == feat])
+            )
+    return thresholds_for_features
diff --git a/setup.py b/setup.py
index d68b47f..e71bd4a 100644
--- a/setup.py
+++ b/setup.py
@@ -58,6 +58,7 @@
     "plot": [
         "pygraphviz>=1.5, <2.0",
     ],
+    "discretiser": ["mdlp-discretization~=0.3.3"],
 }
 
 extras_require["all"] = sorted(chain.from_iterable(extras_require.values()))
diff --git a/test_requirements.txt b/test_requirements.txt
index 5dc729b..cb984c9 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -3,6 +3,7 @@ flake8>=3.5, <4.0
 ipython>=7.0, <7.17
 isort>=4.3.16, <5.0
 matplotlib~=3.3
+mdlp-discretization~=0.3.3
 mock>=2.0.0, <3.0
 pre-commit>=1.17.0, <2.0.0
 pygraphviz>=1.5, <2.0
diff --git a/tests/discretiser/conftest.py b/tests/discretiser/conftest.py
new file mode 100644
index 0000000..a0bdaa0
--- /dev/null
+++ b/tests/discretiser/conftest.py
@@ -0,0 +1,99 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List
+
+import pandas as pd
+import pytest
+from sklearn.datasets import load_diabetes, load_iris
+
+from causalnex.discretiser.abstract_discretiser import (
+    AbstractSupervisedDiscretiserMethod,
+)
+
+
+class Dummy(AbstractSupervisedDiscretiserMethod):
+    def fit(
+        self,
+        feat_names: List[str],
+        target: str,
+        dataframe: pd.DataFrame,
+        target_continuous: bool,
+    ):
+        raise NotImplementedError("This is not implemented")
+
+    def learn(self, get_iris_data):
+        super().fit(
+            feat_names=["petal width (cm)"],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+
+    def learn_transform(self, get_iris_data):
+        super().fit_transform(
+            feat_names=["petal width (cm)"],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+
+
+@pytest.fixture
+def get_dummy_class():
+    return Dummy()
+
+
+@pytest.fixture
+def get_iris_data():
+    iris = load_iris()
+    X, y = iris["data"], iris["target"]
+    names = iris["feature_names"]
+    df = pd.DataFrame(X, columns=names)
+    df["target"] = y
+    return df
+
+
+@pytest.fixture
+def get_diabete_data():
+    diabetes = load_diabetes()
+    X, y = diabetes["data"], diabetes["target"]
+    names = diabetes["feature_names"]
+    df = pd.DataFrame(X, columns=names)
+    df["target"] = y
+    return df
+
+
+@pytest.fixture
+def categorical_data(get_iris_data):
+    return get_iris_data[["petal width (cm)", "target"]]
+
+
+@pytest.fixture
+def continuous_data(get_diabete_data):
+    return get_diabete_data[["s6", "target"]]
diff --git a/tests/discretiser/test_base.py b/tests/discretiser/test_base.py
new file mode 100644
index 0000000..9f4778d
--- /dev/null
+++ b/tests/discretiser/test_base.py
@@ -0,0 +1,48 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+
+class TestBaseClass:
+    def test_fit_not_implemented(self, get_iris_data, get_dummy_class):
+        obj = get_dummy_class
+        with pytest.raises(NotImplementedError):
+            obj.learn(get_iris_data)
+        with pytest.raises(NotImplementedError):
+            obj.fit(
+                feat_names=["petal width (cm)"],
+                dataframe=get_iris_data,
+                target_continuous=False,
+                target="target",
+            )
+
+    def test_fit_transform_not_implemented(self, get_iris_data, get_dummy_class):
+        obj = get_dummy_class
+        with pytest.raises(NotImplementedError):
+            obj.learn_transform(get_iris_data)
diff --git a/tests/discretiser/test_decision_tree.py b/tests/discretiser/test_decision_tree.py
new file mode 100644
index 0000000..a246e58
--- /dev/null
+++ b/tests/discretiser/test_decision_tree.py
@@ -0,0 +1,418 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import pytest
+
+from causalnex.discretiser.discretiser_strategy import (
+    DecisionTreeSupervisedDiscretiserMethod,
+)
+
+
+class TestDecisionTree:
+    def test_single_continuous(self, continuous_data):
+        diabete = continuous_data.copy(deep=True)
+
+        dt_single = DecisionTreeSupervisedDiscretiserMethod(
+            tree_params={"max_depth": 2},
+            mode="single",
+        )
+        tree_discretiser = dt_single.fit(
+            feat_names=["s6"],
+            dataframe=diabete,
+            target_continuous=True,
+            target="target",
+        )
+        discretiser_output = tree_discretiser.transform(diabete[["s6"]]).values
+
+        ground_truth = np.array(
+            [
+                [1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0],
+                [1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 3, 0, 0],
+                [1, 2, 0, 2, 1, 0, 1, 1, 0, 1, 1, 1, 1],
+                [1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 1, 1, 1],
+                [1, 2, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1],
+                [1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1],
+                [0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 2, 1, 1],
+                [1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 1, 1],
+                [0, 1, 0, 1, 2, 2, 1, 0, 1, 2, 1, 1, 1],
+                [3, 2, 1, 1, 1, 2, 2, 1, 1, 0, 0, 0, 2],
+                [2, 0, 1, 1, 0, 2, 1, 1, 2, 1, 1, 3, 1],
+                [0, 1, 0, 1, 2, 1, 0, 1, 1, 2, 1, 1, 2],
+                [0, 1, 0, 1, 0, 2, 1, 2, 1, 1, 0, 2, 3],
+                [1, 1, 0, 2, 0, 1, 1, 1, 1, 1, 1, 1, 0],
+                [1, 2, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1],
+                [1, 1, 2, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2],
+                [2, 1, 0, 1, 1, 1, 0, 2, 2, 2, 1, 1, 0],
+                [0, 1, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 0],
+                [2, 2, 1, 1, 1, 2, 1, 2, 0, 0, 1, 0, 0],
+                [0, 2, 1, 2, 2, 1, 2, 2, 1, 1, 0, 2, 1],
+                [1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 1, 1, 0],
+                [2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
+                [1, 1, 1, 0, 2, 0, 0, 1, 0, 1, 0, 0, 1],
+                [1, 2, 2, 1, 1, 1, 0, 1, 2, 0, 2, 1, 1],
+                [1, 1, 0, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1],
+                [2, 1, 1, 1, 2, 1, 1, 2, 1, 0, 0, 2, 1],
+                [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3],
+                [0, 0, 1, 2, 1, 0, 0, 0, 2, 2, 1, 2, 1],
+                [2, 2, 2, 1, 2, 1, 0, 1, 1, 1, 0, 1, 1],
+                [0, 0, 1, 1, 0, 2, 1, 0, 1, 1, 0, 0, 0],
+                [2, 0, 2, 1, 1, 0, 0, 1, 0, 1, 1, 2, 2],
+                [2, 1, 1, 0, 1, 2, 1, 0, 1, 2, 1, 1, 1],
+                [2, 1, 1, 1, 2, 1, 1, 0, 1, 0, 2, 1, 3],
+                [0, 1, 1, 2, 1, 1, 0, 0, 1, 2, 1, 1, 1],
+            ]
+        )  # ground truth is generated by manually use DecionTree to extract thresholds
+
+        assert (ground_truth == discretiser_output.reshape(-1, 13)).all()
+
+    def test_single_categorical(self, categorical_data):
+        df = categorical_data.copy(deep=True)
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )  # ground truth is generated by manually use DecionTree to extract thresholds
+
+        dt_single = DecisionTreeSupervisedDiscretiserMethod(
+            tree_params={"max_depth": 2},
+            mode="single",
+        )
+        tree_discretiser = dt_single.fit(
+            feat_names=["petal width (cm)"],
+            dataframe=df,
+            target_continuous=False,
+            target="target",
+        )
+        discretiser_output = tree_discretiser.transform(df[["petal width (cm)"]]).values
+        assert (ground_truth == discretiser_output.reshape(-1, 15)).all()
+
+    def test_invalid_mode(self):
+        with pytest.raises(KeyError):
+            DecisionTreeSupervisedDiscretiserMethod(
+                tree_params={"max_depth": 2}, mode="invalid"
+            )
+
+    def test_transform_no_feature(self, get_iris_data, caplog):
+        ground_truth = get_iris_data[["sepal width (cm)"]]
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            mode="multi",
+            split_unselected_feat=False,
+            tree_params={"max_depth": 3, "random_state": 2020},
+        )
+        tree_discretiser = dt_multi.fit(
+            feat_names=["sepal length (cm)", "petal length (cm)"],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+
+        output = tree_discretiser.transform(get_iris_data[["sepal width (cm)"]])
+
+        assert "The column is left unchanged" in caplog.text
+        assert all(ground_truth == output)
+
+    def test_keep_unselected_feature(self, get_iris_data):
+        ground_truth = np.array(
+            [
+                [4, 2, 3, 3, 4, 6, 4, 4, 1, 3, 4, 4, 2, 2, 6],
+                [6, 6, 4, 5, 5, 4, 4, 4, 3, 4, 2, 4, 4, 4, 3],
+                [3, 4, 6, 6, 3, 3, 4, 4, 2, 4, 4, 0, 3, 4, 5],
+                [2, 5, 3, 4, 3, 3, 3, 3, 0, 1, 1, 3, 0, 1, 1],
+                [0, 2, 0, 1, 1, 3, 2, 1, 0, 1, 3, 1, 1, 1, 1],
+                [2, 1, 2, 1, 1, 0, 0, 1, 1, 2, 4, 3, 0, 2, 1],
+                [1, 2, 1, 0, 1, 2, 1, 1, 1, 1, 3, 1, 2, 1, 2],
+                [2, 1, 1, 1, 4, 3, 1, 2, 1, 1, 3, 2, 5, 1, 0],
+                [3, 1, 1, 1, 3, 3, 1, 2, 1, 2, 1, 5, 1, 1, 1],
+                [2, 4, 3, 2, 3, 3, 3, 1, 3, 3, 2, 1, 2, 4, 2],
+            ]
+        )  # ground truth is generated by manually use DecionTree to extract thresholds
+
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            tree_params={"max_depth": 3, "random_state": 2020},
+            mode="multi",
+            split_unselected_feat=True,
+        )
+        tree_discretiser = dt_multi.fit(
+            feat_names=[
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+
+        output = tree_discretiser.transform(get_iris_data[["sepal width (cm)"]]).values
+        assert (ground_truth == output.reshape(-1, 15)).all()
+
+    def test_multi_fit(self, get_iris_data):
+        ground_truth_petal_length = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+                [0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2],
+                [2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 1, 2, 1, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )  # ground truth is generated by manually use DecionTree to extract thresholds
+
+        ground_truth_petal_width = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )  # ground truth is generated by manually use DecionTree to extract thresholds
+
+        iris = get_iris_data.copy(deep=True)
+
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            tree_params={"max_depth": 3, "random_state": 2020}, mode="multi"
+        )
+
+        tree_discretiser = dt_multi.fit(
+            feat_names=[
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            dataframe=iris,
+            target_continuous=False,
+            target="target",
+        )
+
+        discretiser_petal_length = tree_discretiser.transform(
+            iris[["petal length (cm)"]]
+        ).values
+        discretiser_petal_width = tree_discretiser.transform(
+            iris[["petal width (cm)"]]
+        ).values
+        assert (
+            ground_truth_petal_length == discretiser_petal_length.reshape(-1, 15)
+        ).all()
+        assert (
+            ground_truth_petal_width == discretiser_petal_width.reshape(-1, 15)
+        ).all()
+
+    def test_no_unselected_feature(self, get_iris_data):
+        ground_truth = get_iris_data[["sepal width (cm)"]]
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            tree_params={"max_depth": 3, "random_state": 2020},
+            mode="multi",
+            split_unselected_feat=False,
+        )
+        tree_discretiser = dt_multi.fit(
+            feat_names=[
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+        output = tree_discretiser.transform(get_iris_data[["sepal width (cm)"]])
+
+        assert all(ground_truth == output)
+
+    def test_default_args(self):
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod()
+        params = dt_multi.get_params()
+        assert params["tree_params"]["max_depth"] == 2
+
+    def test_transform_all_single(self, get_iris_data):
+        data = get_iris_data.copy(deep=True)
+        sepal_length = np.array(
+            [
+                [1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 2],
+                [2, 1, 1, 2, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0],
+                [0, 1, 1, 2, 1, 1, 2, 1, 0, 1, 1, 0, 0, 1, 1],
+                [0, 1, 0, 1, 1, 3, 3, 3, 2, 3, 2, 3, 1, 3, 1],
+                [1, 2, 2, 2, 2, 3, 2, 2, 3, 2, 2, 2, 3, 2, 3],
+                [3, 3, 3, 2, 2, 2, 2, 2, 2, 1, 2, 3, 3, 2, 2],
+                [2, 2, 2, 1, 2, 2, 2, 3, 1, 2, 3, 2, 3, 3, 3],
+                [3, 1, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 2],
+                [3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2],
+                [3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 2],
+            ]
+        )
+        sepal_width = np.array(
+            [
+                [2, 1, 1, 1, 2, 3, 2, 2, 0, 1, 2, 2, 1, 1, 3],
+                [3, 3, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1],
+                [1, 2, 3, 3, 1, 1, 2, 2, 1, 2, 2, 0, 1, 2, 2],
+                [1, 2, 1, 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0],
+                [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0],
+                [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 1, 0],
+                [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1],
+                [1, 0, 0, 0, 2, 1, 0, 1, 0, 0, 1, 1, 2, 0, 0],
+                [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 2, 0, 0, 0],
+                [1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1],
+            ]
+        )
+        petal_length = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1],
+                [1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )
+        petal_width = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )
+
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            mode="single", tree_params={"max_depth": 2, "random_state": 2020}
+        )
+        tree_discretiser = dt_multi.fit(
+            feat_names=[
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            dataframe=data,
+            target="target",
+            target_continuous=False,
+        )
+        output_df = tree_discretiser.transform(data)
+
+        assert (
+            output_df["sepal length (cm)"].values.reshape(-1, 15) == sepal_length
+        ).all()
+        assert (
+            output_df["sepal width (cm)"].values.reshape(-1, 15) == sepal_width
+        ).all()
+        assert (
+            output_df["petal length (cm)"].values.reshape(-1, 15) == petal_length
+        ).all()
+        assert (
+            output_df["petal width (cm)"].values.reshape(-1, 15) == petal_width
+        ).all()
+
+    def test_transform_all_multi(self, get_iris_data):
+        data = get_iris_data.copy(deep=True)
+        sepal_length = data["sepal length (cm)"]
+        sepal_width = data["sepal width (cm)"]
+        petal_length = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
+                [0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2],
+                [2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 1, 2, 1, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )
+        petal_width = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )
+
+        dt_multi = DecisionTreeSupervisedDiscretiserMethod(
+            mode="multi", tree_params={"max_depth": 3, "random_state": 2020}
+        )
+        tree_discretiser = dt_multi.fit(
+            feat_names=[
+                "sepal length (cm)",
+                "sepal width (cm)",
+                "petal length (cm)",
+                "petal width (cm)",
+            ],
+            dataframe=data,
+            target="target",
+            target_continuous=False,
+        )
+        output_df = tree_discretiser.transform(data)
+
+        assert (output_df["sepal length (cm)"].values == sepal_length).all()
+        assert (output_df["sepal width (cm)"].values == sepal_width).all()
+        assert (
+            output_df["petal length (cm)"].values.reshape(-1, 15) == petal_length
+        ).all()
+        assert (
+            output_df["petal width (cm)"].values.reshape(-1, 15) == petal_width
+        ).all()
diff --git a/tests/discretiser/test_mdlp.py b/tests/discretiser/test_mdlp.py
new file mode 100644
index 0000000..730a452
--- /dev/null
+++ b/tests/discretiser/test_mdlp.py
@@ -0,0 +1,108 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from importlib import reload
+
+import numpy as np
+import pytest
+from mock import patch
+
+from causalnex.discretiser import discretiser_strategy
+from causalnex.discretiser.discretiser_strategy import MDLPSupervisedDiscretiserMethod
+
+
+class TestMDLP:
+    def test_output(self, get_iris_data):
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+                [1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0],
+                [0, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 1, 1, 1, 1, 2, 0, 2, 2, 2, 1, 1],
+                [1, 2, 1, 0, 1, 1, 1, 2, 0, 1, 2, 1, 2, 2, 2],
+                [2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2],
+                [2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )  # ground_truth is generated by manually use MDLP to extract thresholds
+
+        discretiser = MDLPSupervisedDiscretiserMethod(
+            {"min_depth": 0, "random_state": 2020, "min_split": 1e-3, "dtype": int}
+        )
+
+        discretiser.fit(
+            feat_names=["sepal length (cm)"],
+            dataframe=get_iris_data,
+            target_continuous=False,
+            target="target",
+        )
+
+        output = discretiser.transform(get_iris_data[["sepal length (cm)"]]).values
+        assert (output.reshape(-1, 15) == ground_truth).all()
+
+    def test_target_continuous(self, get_iris_data):
+
+        discretiser = MDLPSupervisedDiscretiserMethod(
+            {"min_depth": 0, "random_state": 17, "min_split": 1e-3, "dtype": int}
+        )
+
+        with pytest.raises(ValueError):
+            discretiser.fit(
+                feat_names=["sepal length (cm)"],
+                dataframe=get_iris_data,
+                target_continuous=True,
+                target="target",
+            )
+
+    def test_warning_import(self, caplog):
+        with patch.dict("sys.modules", {"mdlp.discretization": None}):
+            reload(discretiser_strategy)
+        reload(discretiser_strategy)
+        assert "MDLP was not imported successfully" in caplog.text
+
+    def test_import_error(self):
+        with patch.dict("sys.modules", {"mdlp.discretization": None}):
+            reload(discretiser_strategy)
+            with pytest.raises(ImportError):
+                discretiser_strategy.MDLPSupervisedDiscretiserMethod(
+                    {
+                        "min_depth": 0,
+                        "random_state": 2020,
+                        "min_split": 1e-3,
+                        "dtype": int,
+                    }
+                )
+        reload(discretiser_strategy)
+
+    def test_default_args(self):
+        discretiser = MDLPSupervisedDiscretiserMethod()
+        params = discretiser.get_params()
+        assert params["mdlp_args"]["min_depth"] == 0
+        assert params["mdlp_args"]["min_split"] == 1e-3
+        assert params["mdlp_args"]["dtype"] == int

From 17f22850d2fe73a219bd165cdacd032b93365fdb Mon Sep 17 00:00:00 2001
From: hiepnguyen034 <hiep.nguyen@quantumblack.com>
Date: Tue, 11 May 2021 21:55:33 +0700
Subject: [PATCH 2/4] Feature/sklearn compatibility (#153)

Co-authored-by: philip_pilgerstorfer <philip.pilgerstorfer!@quantumblack.com>
Co-authored-by: Zain Patel <zain.patel@quantumblack.com>
---
 RELEASE.md                                    |   5 +-
 causalnex/discretiser/discretiser_strategy.py |   1 +
 causalnex/network/sklearn/__init__.py         |  36 +++
 causalnex/network/sklearn/models.py           | 294 +++++++++++++++++
 tests/conftest.py                             |  36 +++
 tests/discretiser/test_decision_tree.py       |   1 -
 tests/test_network_model.py                   | 298 ++++++++++++++++++
 7 files changed, 668 insertions(+), 3 deletions(-)
 create mode 100644 causalnex/network/sklearn/__init__.py
 create mode 100644 causalnex/network/sklearn/models.py
 create mode 100644 tests/test_network_model.py

diff --git a/RELEASE.md b/RELEASE.md
index 9e8e5f3..f900934 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,12 +1,13 @@
 # Upcoming release
 
 # Release 0.10.0
+* Add supervised discretisation strategies using Decision Tree and MDLP algorithms.
+* Add `BayesianNetworkClassifier` an sklearn compatible class for fitting and predicting probabilities in a BN.
+* Fixes cyclical import of `causalnex.plots`, as per #106.
 * Add utility function to extract Markov blanket from a Bayesian Network
 * Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
-* Fixes cyclical import of `causalnex.plots`, as per #106
 * Add supervised discretisation strategies using Decision Tree and MDLP algorithms
 
-
 # Release 0.9.2
 * Remove Boston housing dataset from "sklearn tutorial", see #91 for more information.
 * Update pylint version to 2.7
diff --git a/causalnex/discretiser/discretiser_strategy.py b/causalnex/discretiser/discretiser_strategy.py
index 8431ec1..7b50c29 100644
--- a/causalnex/discretiser/discretiser_strategy.py
+++ b/causalnex/discretiser/discretiser_strategy.py
@@ -225,6 +225,7 @@ class MDLPSupervisedDiscretiserMethod(AbstractSupervisedDiscretiserMethod):
         >>> )
         >>> discretised_data = discretiser.transform(data[["sepal length (cm)"]])
         >>> discretised_data.values.ravel()
+
         array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0, 0, 2, 2, 2, 1, 2,
diff --git a/causalnex/network/sklearn/__init__.py b/causalnex/network/sklearn/__init__.py
new file mode 100644
index 0000000..b6d8ad2
--- /dev/null
+++ b/causalnex/network/sklearn/__init__.py
@@ -0,0 +1,36 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+``causalnex.network.sklearn`` provides functionality to learn joint probability
+distribution of networks with sklearn compatibility.
+"""
+
+__all__ = ["BayesianNetworkClassifier"]
+
+from .models import BayesianNetworkClassifier
diff --git a/causalnex/network/sklearn/models.py b/causalnex/network/sklearn/models.py
new file mode 100644
index 0000000..3e76408
--- /dev/null
+++ b/causalnex/network/sklearn/models.py
@@ -0,0 +1,294 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+This module contains the implementation of ``BayesianNetworkClassifier``.
+
+``BayesianNetworkClassifier`` is a class that supports learning CPDs from input data
+and making predictions
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import pandas as pd
+from sklearn.base import BaseEstimator, ClassifierMixin
+
+from causalnex.discretiser import Discretiser
+from causalnex.discretiser.discretiser_strategy import (
+    DecisionTreeSupervisedDiscretiserMethod,
+    MDLPSupervisedDiscretiserMethod,
+)
+from causalnex.network import BayesianNetwork
+from causalnex.structure import StructureModel
+
+
+class BayesianNetworkClassifier(BaseEstimator, ClassifierMixin):
+    """
+    A class that supports discretising features and probability fitting with scikit-learn syntax
+
+    Example:
+    ::
+        # Dataset is from https://archive.ics.uci.edu/ml/datasets/student+performance
+        >>> import pandas as pd
+        >>> import numpy as np
+        >>> from sklearn.preprocessing import LabelEncoder
+        >>> from causalnex.discretiser import Discretiser
+        >>> from causalnex.network.sklearn import BayesianNetworkClassifier
+        >>> from sklearn.model_selection import train_test_split
+        >>> data = pd.read_csv('student-por.csv', delimiter=';')
+        >>> drop_col = ['school','sex','age','Mjob', 'Fjob','reason','guardian']
+        >>> data = data.drop(columns=drop_col)
+        >>> non_numeric_columns = list(data.select_dtypes(exclude=[np.number]).columns)
+        >>> le = LabelEncoder()
+        >>> for col in non_numeric_columns:
+        >>>     data[col] = le.fit_transform(data[col])
+        >>> data["G3"] = Discretiser(method="fixed",
+                      numeric_split_points=[10]).transform(data["G3"].values)
+        >>> label = data["G3"]
+        >>> data.drop(['G3'], axis=1, inplace=True)
+        >>> X_train, X_test, y_train, y_test = train_test_split(
+                        data, label, test_size=0.1, random_state=7)
+        >>> edge_list = [('address', 'absences'),
+                         ('Pstatus', 'famrel'),
+                         ('Pstatus', 'absences'),
+                         ('studytime', 'G1'),
+                         ('G1', 'G2'),
+                         ('failures', 'absences'),
+                         ('failures', 'G1'),
+                         ('schoolsup', 'G1'),
+                         ('paid', 'absences'),
+                         ('higher', 'famrel'),
+                         ('higher', 'G1'),
+                         ('internet', 'absences'),
+                         ('G2', 'G3')]
+        >>> discretiser_param = {
+                'absences': {'method':"fixed",
+                             'numeric_split_points':[1, 10]
+                            },
+                 'G1': {'method':"fixed",
+                        'numeric_split_points':[10]
+                       },
+                 'G2': {'method':"fixed",
+                        'numeric_split_points':[10]
+                       }
+                }
+        >>> discretiser_alg = {'absences': 'unsupervised',
+                              'G1': 'unsupervised',
+                              'G2': 'unsupervised'
+                             }
+        >>> bayesian_param = {'method':"BayesianEstimator", 'bayes_prior':"K2"}
+        >>> clf = BayesianNetworkClassifier(edge_list, discretiser_alg, discretiser_param, bayesian_param)
+        >>> clf.fit(X_train, y_train)
+        >>> clf.predict(X_test)
+        array([1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+               1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1,
+               1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0])
+
+    """
+
+    def __init__(
+        self,
+        list_of_edges: List[Tuple[str]],
+        discretiser_alg: Optional[Dict[str, str]] = None,
+        discretiser_kwargs: Optional[Dict[str, Dict[str, Any]]] = None,
+        probability_kwargs: Dict[str, Dict[str, Any]] = None,
+        return_prob: bool = False,
+    ):
+        """
+        Args:
+            list_of_edges (list): Edge list to construct graph
+            - if True: return pandas dataframe with predicted probability for each state
+            - if False: return a 1-D prediction array
+            discretiser_alg (dict): Specify a supervised algorithm to discretise
+            each feature in the data. Available options for the dictionary values
+            are ['unsupervised', 'tree', 'mdlp']
+            - if 'unsupervised': discretise the data using unsupervised method
+            - if 'tree': discretise the data using decision tree method
+            - if 'mdlp': discretise the data using MDLP method
+            discretiser_kwargs (dict): Keyword arguments for discretisation methods.
+            Only applicable if discretiser_alg is not None.
+            probability_kwargs (dict): keyword arguments for the probability model
+            return_prob (bool): choose to return predictions or probability
+
+        Raises:
+            KeyError: If an incorrect argument is passed
+            ValueError: If the keys in discretiser_alg and discretiser_kwargs differ
+        """
+
+        probability_kwargs = probability_kwargs or {
+            "method": "BayesianEstimator",
+            "bayes_prior": "K2",
+        }
+
+        if discretiser_alg is None:
+            logging.info(
+                "No discretiser algorithm was given "
+                "The training data will not be discretised"
+            )
+            discretiser_alg = {}
+
+        discretiser_kwargs = discretiser_kwargs or {}
+
+        self._validate_discretiser(discretiser_alg, discretiser_kwargs)
+
+        self.structure = StructureModel(list_of_edges)
+        self.bn = BayesianNetwork(self.structure)
+        self.return_prob = return_prob
+        self.probability_kwargs = probability_kwargs
+        self.discretiser_kwargs = discretiser_kwargs
+        self.discretiser_alg = discretiser_alg
+        self._target_name = None
+        self._discretise_data = None
+
+    @staticmethod
+    def _validate_discretiser(discretiser_alg, discretiser_kwargs):
+        unavailable_discretiser_algs = {
+            k: v not in ["unsupervised", "tree", "mdlp"]
+            for k, v in discretiser_alg.items()
+        }
+
+        if any(unavailable_discretiser_algs.values()):
+            raise KeyError(
+                "Some discretiser algorithms are not supported: `{:}`. "
+                "Please choose in ['unsupervised', 'tree', 'mdlp']".format(
+                    {
+                        k: discretiser_alg[k]
+                        for k, v in unavailable_discretiser_algs.items()
+                        if v
+                    }
+                )
+            )
+
+        if set(discretiser_kwargs) != set(discretiser_alg):
+            raise ValueError(
+                "discretiser_alg and discretiser_kwargs should have the same keys"
+            )
+
+    def _discretise_features(self, X: pd.DataFrame) -> pd.DataFrame:
+        """
+        Helper method to discretise input data using parameters in
+        `discretiser_kwargs` and `discretiser_alg`.
+        The splitting thresholds are extracted from the training data
+
+        Args:
+            X (pd.DataFrame): a dataframe to be discretised
+
+        Returns:
+            a discretised version of the input dataframe
+        """
+
+        X = X.copy()
+
+        for col in self.discretiser_alg.keys():
+
+            if self.discretiser_alg[col] == "unsupervised":
+
+                if self.discretiser_kwargs[col]["method"] == "fixed":
+                    X[col] = Discretiser(**self.discretiser_kwargs[col]).transform(
+                        X[col].values
+                    )
+                else:
+                    discretiser = Discretiser(**self.discretiser_kwargs[col]).fit(
+                        self._discretise_data[col].values
+                    )
+                    X[col] = discretiser.transform(X[col].values)
+
+            else:
+                if self.discretiser_alg[col] == "tree":
+                    discretiser = DecisionTreeSupervisedDiscretiserMethod(
+                        mode="single", tree_params=self.discretiser_kwargs[col]
+                    )
+
+                elif self.discretiser_alg[col] == "mdlp":
+                    discretiser = MDLPSupervisedDiscretiserMethod(
+                        self.discretiser_kwargs[col]
+                    )
+
+                discretiser.fit(
+                    dataframe=self._discretise_data,
+                    feat_names=[col],
+                    target=self._target_name,
+                    target_continuous=False,
+                )
+
+                X[col] = discretiser.transform(X[[col]])
+
+        return X
+
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> "BayesianNetworkClassifier":
+        """
+        Build a Bayesian Network classifier from a set of training data.
+        The method first discretises the feature using parameters in `discretiser_kwargs`
+        and `discretiser_alg`. Next, it learns all the possible nodes that each feature
+        can have. Finally, it learns the CPDs of the Bayesian Network.
+
+        Args:
+            X (pd.DataFrame): input training data
+            y (pd.Series): categorical label for each row of X
+
+        Returns:
+            self
+        """
+        self._discretise_data = X.copy()
+        self._discretise_data[y.name] = y
+        self._target_name = y.name
+        X = self._discretise_features(X)
+
+        X[y.name] = y
+        self.bn = self.bn.fit_node_states(X)
+        self.bn = self.bn.fit_cpds(X, **self.probability_kwargs)
+
+        return self
+
+    def predict(self, X: pd.DataFrame) -> Union[pd.DataFrame, np.ndarray]:
+        """
+        Return predictions for the input data
+
+        Args:
+            X (pd.DataFrame): A dataframe of shape (num_row, num_features) for model to predict
+
+        Returns:
+            Model's prediction: A numpy array of shape (num_row,)
+
+        Raises:
+            ValueError: if CPDs are empty
+
+        """
+        if self.bn.cpds == {}:
+            raise ValueError("No CPDs found. The model has not been fitted")
+
+        X = self._discretise_features(X)
+
+        if self.return_prob:
+            pred = self.bn.predict_probability(X, self._target_name)
+        else:
+            pred = self.bn.predict(X, self._target_name).to_numpy().reshape(-1)
+
+        return pred
diff --git a/tests/conftest.py b/tests/conftest.py
index 9996b06..03702a5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -32,7 +32,9 @@
 import pandas as pd
 import pytest
 from pgmpy.models import BayesianModel
+from sklearn.datasets import load_iris
 
+from causalnex.discretiser import Discretiser
 from causalnex.network import BayesianNetwork
 from causalnex.structure import StructureModel
 from causalnex.structure.notears import from_pandas
@@ -1034,3 +1036,37 @@ def adjacency_mat_num_stability() -> np.ndarray:
         ]
     )
     return W
+
+
+@pytest.fixture
+def iris_test_data() -> pd.DataFrame:
+    """
+    Iris dataset to test sklearn wrappers
+    """
+    iris = load_iris()
+    X, y = iris["data"], iris["target"]
+    names = iris["feature_names"]
+    df = pd.DataFrame(X, columns=names)
+    df["type"] = y
+    df["sepal length (cm)"] = Discretiser(
+        method="quantile", num_buckets=3
+    ).fit_transform(df["sepal length (cm)"].values)
+
+    return df
+
+
+@pytest.fixture
+def iris_edge_list():
+    """
+    Edge list to construct bayesian network for iris data
+    """
+    edge_list = [
+        ("sepal width (cm)", "sepal length (cm)"),
+        ("petal length (cm)", "sepal length (cm)"),
+        ("petal length (cm)", "sepal width (cm)"),
+        ("petal width (cm)", "petal length (cm)"),
+        ("type", "sepal width (cm)"),
+        ("type", "petal width (cm)"),
+    ]
+
+    return edge_list
diff --git a/tests/discretiser/test_decision_tree.py b/tests/discretiser/test_decision_tree.py
index a246e58..a688722 100644
--- a/tests/discretiser/test_decision_tree.py
+++ b/tests/discretiser/test_decision_tree.py
@@ -178,7 +178,6 @@ def test_keep_unselected_feature(self, get_iris_data):
             target_continuous=False,
             target="target",
         )
-
         output = tree_discretiser.transform(get_iris_data[["sepal width (cm)"]]).values
         assert (ground_truth == output.reshape(-1, 15)).all()
 
diff --git a/tests/test_network_model.py b/tests/test_network_model.py
new file mode 100644
index 0000000..be1075f
--- /dev/null
+++ b/tests/test_network_model.py
@@ -0,0 +1,298 @@
+# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
+# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
+# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
+# (either separately or in combination, "QuantumBlack Trademarks") are
+# trademarks of QuantumBlack. The License does not grant you any right or
+# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
+# Trademarks or any confusingly similar mark as a trademark for your product,
+#     or use the QuantumBlack Trademarks in any other manner that might cause
+# confusion in the marketplace, including but not limited to in advertising,
+# on websites, or on software.
+#
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import numpy as np
+import pytest
+
+from causalnex.network.sklearn import BayesianNetworkClassifier
+
+
+class TestBayesianCPDs:
+    def test_default_params(self):
+        edge_list = [
+            ("b", "a"),
+            ("b", "c"),
+            ("d", "a"),
+            ("d", "c"),
+            ("d", "b"),
+            ("e", "c"),
+            ("e", "b"),
+        ]
+        clf = BayesianNetworkClassifier(edge_list)
+        params = clf.get_params()
+        assert params["discretiser_alg"] == {}
+        assert params["probability_kwargs"]["method"] == "BayesianEstimator"
+        assert params["probability_kwargs"]["bayes_prior"] == "K2"
+        assert params["discretiser_kwargs"] == {}
+
+    def test_predict_quantile(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [2, 2, 2, 1, 1, 1, 2, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [2, 1, 2, 1, 1, 1, 1, 2, 1, 1],
+                [1, 1, 1, 2, 1, 2, 1, 1, 1, 1],
+                [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+                [2, 2, 2, 2, 2, 2, 1, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 1, 2, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 1, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
+            ]
+        )
+
+        discretiser_params = {
+            "sepal width (cm)": {"method": "quantile", "num_buckets": 3},
+            "petal length (cm)": {"method": "quantile", "num_buckets": 3},
+            "petal width (cm)": {"method": "quantile", "num_buckets": 3},
+        }
+
+        label = df["sepal length (cm)"]
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(
+            iris_edge_list,
+            discretiser_kwargs=discretiser_params,
+            discretiser_alg={
+                "sepal width (cm)": "unsupervised",
+                "petal length (cm)": "unsupervised",
+                "petal width (cm)": "unsupervised",
+            },
+        )
+        clf.fit(df, label)
+        output = clf.predict(df)
+        assert np.array_equal(output.reshape(15, -1), ground_truth)
+
+    def test_predict_fixed(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [2, 2, 2, 1, 1, 1, 2, 0, 1, 1],
+                [0, 2, 1, 1, 0, 2, 2, 1, 1, 1],
+                [2, 1, 1, 1, 1, 2, 1, 2, 1, 0],
+                [1, 1, 1, 1, 2, 2, 2, 1, 2, 1],
+                [1, 2, 1, 0, 1, 2, 1, 1, 0, 1],
+                [2, 1, 2, 1, 2, 2, 1, 1, 1, 2],
+                [2, 1, 2, 1, 1, 2, 2, 2, 1, 1],
+                [2, 1, 1, 1, 2, 2, 1, 2, 1, 2],
+                [1, 2, 1, 1, 1, 2, 2, 2, 2, 2],
+                [2, 2, 1, 2, 2, 2, 1, 2, 2, 2],
+            ]
+        )
+
+        discretiser_params = {
+            "sepal width (cm)": {"method": "fixed", "numeric_split_points": [3]},
+            "petal length (cm)": {"method": "fixed", "numeric_split_points": [3.7]},
+            "petal width (cm)": {"method": "fixed", "numeric_split_points": [1.2]},
+        }
+
+        label = df["sepal length (cm)"]
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(
+            iris_edge_list,
+            discretiser_kwargs=discretiser_params,
+            discretiser_alg={
+                "sepal width (cm)": "unsupervised",
+                "petal length (cm)": "unsupervised",
+                "petal width (cm)": "unsupervised",
+            },
+        )
+        clf.fit(df, label)
+        output = clf.predict(df)
+        assert np.array_equal(output.reshape(15, -1), ground_truth)
+
+    def test_return_probability(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+
+        discretiser_params = {
+            "sepal width (cm)": {"method": "fixed", "numeric_split_points": [3]},
+            "petal length (cm)": {"method": "fixed", "numeric_split_points": [3.7]},
+            "petal width (cm)": {"method": "fixed", "numeric_split_points": [1.2]},
+        }
+
+        label = df["sepal length (cm)"]
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(
+            iris_edge_list,
+            discretiser_kwargs=discretiser_params,
+            discretiser_alg={
+                "sepal width (cm)": "unsupervised",
+                "petal length (cm)": "unsupervised",
+                "petal width (cm)": "unsupervised",
+            },
+            return_prob=True,
+        )
+        clf.fit(df, label)
+        output = clf.predict(df.iloc[0:1])
+        assert len(list(output)) == 3
+        assert math.isclose(
+            output["sepal length (cm)_0"].values, 0.764706, abs_tol=1e-3
+        )
+        assert math.isclose(
+            output["sepal length (cm)_1"].values, 0.215686, abs_tol=1e-3
+        )
+
+    def test_no_fit(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(iris_edge_list)
+        with pytest.raises(
+            ValueError,
+            match="No CPDs found. The model has not been fitted",
+        ):
+            clf.predict(df)
+
+    def test_dt_discretiser(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
+                [1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
+                [2, 2, 2, 1, 1, 1, 2, 0, 1, 1],
+                [0, 1, 1, 1, 1, 2, 1, 1, 1, 1],
+                [2, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [1, 1, 1, 1, 1, 1, 2, 1, 1, 1],
+                [1, 1, 1, 0, 1, 1, 1, 1, 0, 1],
+                [2, 1, 2, 2, 2, 2, 1, 2, 2, 2],
+                [2, 2, 2, 1, 1, 2, 2, 2, 2, 1],
+                [2, 1, 2, 1, 2, 2, 1, 1, 2, 2],
+                [2, 2, 2, 1, 2, 2, 2, 2, 1, 2],
+                [2, 2, 1, 2, 2, 2, 1, 2, 2, 1],
+            ]
+        )
+        supervised_param = {
+            "sepal width (cm)": {"max_depth": 2, "random_state": 2020},
+            "petal length (cm)": {"max_depth": 2, "random_state": 2020},
+            "petal width (cm)": {"max_depth": 2, "random_state": 2020},
+        }
+
+        label = df["sepal length (cm)"]
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(
+            iris_edge_list,
+            discretiser_kwargs=supervised_param,
+            discretiser_alg={
+                "sepal width (cm)": "tree",
+                "petal length (cm)": "tree",
+                "petal width (cm)": "tree",
+            },
+        )
+        clf.fit(df, label)
+        output = clf.predict(df)
+        assert np.array_equal(output.reshape(15, -1), ground_truth)
+
+    def test_mdlp_discretiser(self, iris_test_data, iris_edge_list):
+        df = iris_test_data.copy()
+        ground_truth = np.array(
+            [
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+                [2, 2, 2, 1, 1, 1, 1, 0, 1, 0],
+                [0, 1, 1, 1, 0, 2, 1, 1, 1, 0],
+                [2, 1, 1, 1, 1, 1, 1, 1, 1, 0],
+                [0, 0, 0, 2, 1, 1, 2, 1, 1, 1],
+                [1, 1, 1, 0, 1, 1, 1, 1, 0, 1],
+                [2, 2, 2, 2, 2, 2, 1, 2, 2, 2],
+                [2, 2, 2, 1, 2, 2, 2, 2, 2, 1],
+                [2, 1, 2, 1, 2, 2, 1, 1, 2, 2],
+                [2, 2, 2, 2, 2, 2, 2, 2, 1, 2],
+                [2, 2, 2, 2, 2, 2, 1, 2, 2, 2],
+            ]
+        )
+        supervised_param = {
+            "sepal width (cm)": {"min_depth": 0, "random_state": 2020},
+            "petal length (cm)": {"min_depth": 0, "random_state": 2020},
+            "petal width (cm)": {"min_depth": 0, "random_state": 2020},
+        }
+        label = df["sepal length (cm)"]
+        df.drop(["sepal length (cm)"], axis=1, inplace=True)
+        clf = BayesianNetworkClassifier(
+            iris_edge_list,
+            discretiser_alg={
+                "sepal width (cm)": "mdlp",
+                "petal length (cm)": "mdlp",
+                "petal width (cm)": "mdlp",
+            },
+            discretiser_kwargs=supervised_param,
+        )
+        clf.fit(df, label)
+        output = clf.predict(df)
+        assert np.array_equal(output.reshape(15, -1), ground_truth)
+
+    def test_invalid_algorithm(self, iris_edge_list):
+
+        with pytest.raises(
+            KeyError, match="Some discretiser algorithms are not supported"
+        ):
+            BayesianNetworkClassifier(
+                iris_edge_list,
+                discretiser_alg={
+                    "sepal width (cm)": "invalid",
+                    "petal length (cm)": "invalid",
+                    "petal width (cm)": "mdlp",
+                },
+            )
+
+    def test_missing_kwargs(self, iris_edge_list):
+        supervised_param = {
+            "sepal width (cm)": {"min_depth": 0, "random_state": 2020},
+            "petal length (cm)": {"min_depth": 0, "random_state": 2020},
+        }
+        discretiser_alg = {
+            "sepal width (cm)": "tree",
+            "petal length (cm)": "tree",
+            "petal width (cm)": "mdlp",
+        }
+        with pytest.raises(
+            ValueError,
+            match="discretiser_alg and discretiser_kwargs should have the same keys",
+        ):
+            BayesianNetworkClassifier(
+                iris_edge_list,
+                discretiser_alg=discretiser_alg,
+                discretiser_kwargs=supervised_param,
+            )

From c9f306ec4571dbb61586ee97f39c80612eb861e6 Mon Sep 17 00:00:00 2001
From: Philip Pilgerstorfer <34248114+qbphilip@users.noreply.github.com>
Date: Tue, 11 May 2021 17:54:22 +0100
Subject: [PATCH 3/4] add MANIFEST.in (#160)

* add MANIFEST.in

* newline

* add release note

Co-authored-by: philip_pilgerstorfer <philip.pilgerstorfer!@quantumblack.com>
---
 MANIFEST.in | 5 +++++
 RELEASE.md  | 1 +
 2 files changed, 6 insertions(+)
 create mode 100644 MANIFEST.in

diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..3f30d1b
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,5 @@
+include README.md
+include LICENSE.md
+include legal_header.txt
+include requirements.txt
+include test_requirements.txt
diff --git a/RELEASE.md b/RELEASE.md
index f900934..16ddf87 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -7,6 +7,7 @@
 * Add utility function to extract Markov blanket from a Bayesian Network
 * Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
 * Add supervised discretisation strategies using Decision Tree and MDLP algorithms
+* Added manifest files to ensure requirements and licenses are packaged
 
 # Release 0.9.2
 * Remove Boston housing dataset from "sklearn tutorial", see #91 for more information.

From d5ef28f1a12f3e6c74db6a45e5d8d1a6288d09a3 Mon Sep 17 00:00:00 2001
From: Philip Pilgerstorfer <34248114+qbphilip@users.noreply.github.com>
Date: Tue, 11 May 2021 18:47:46 +0100
Subject: [PATCH 4/4] Inofficial python 3.9 support (#161)

Co-authored-by: philip_pilgerstorfer <philip.pilgerstorfer!@quantumblack.com>
---
 RELEASE.md                                   |  2 ++
 causalnex/network/sklearn/models.py          |  3 ++-
 causalnex/structure/pytorch/sklearn/_base.py | 16 ++++++++--------
 causalnex/structure/pytorch/sklearn/clf.py   |  6 +++---
 causalnex/structure/pytorch/sklearn/reg.py   |  4 ++--
 causalnex/utils/network_utils.py             |  2 +-
 requirements.txt                             |  6 +++---
 tests/structure/test_sklearn.py              | 20 ++++++++++++++++++++
 8 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 16ddf87..eeb3ac1 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -8,6 +8,8 @@
 * Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
 * Add supervised discretisation strategies using Decision Tree and MDLP algorithms
 * Added manifest files to ensure requirements and licenses are packaged
+* Fix estimator issues with sklearn ("unofficial python 3.9 support", doesn't work with `discretiser` option)
+* Minor bumps in dependency versions, remove prettytable as dependency
 
 # Release 0.9.2
 * Remove Boston housing dataset from "sklearn tutorial", see #91 for more information.
diff --git a/causalnex/network/sklearn/models.py b/causalnex/network/sklearn/models.py
index 3e76408..98f925e 100644
--- a/causalnex/network/sklearn/models.py
+++ b/causalnex/network/sklearn/models.py
@@ -158,7 +158,8 @@ def __init__(
 
         self._validate_discretiser(discretiser_alg, discretiser_kwargs)
 
-        self.structure = StructureModel(list_of_edges)
+        self.list_of_edges = list_of_edges
+        self.structure = StructureModel(self.list_of_edges)
         self.bn = BayesianNetwork(self.structure)
         self.return_prob = return_prob
         self.probability_kwargs = probability_kwargs
diff --git a/causalnex/structure/pytorch/sklearn/_base.py b/causalnex/structure/pytorch/sklearn/_base.py
index 3c86528..adbee21 100644
--- a/causalnex/structure/pytorch/sklearn/_base.py
+++ b/causalnex/structure/pytorch/sklearn/_base.py
@@ -76,7 +76,7 @@ def __init__(
         enforce_dag: bool = False,
         standardize: bool = False,
         target_dist_type: str = None,
-        **kwargs,
+        notears_mlp_kwargs: Dict = None,
     ):
         """
         Args:
@@ -120,7 +120,7 @@ def __init__(
             The L-BFGS algorithm used to fit the underlying NOTEARS works best on data
             all of the same scale so this parameter is reccomended.
 
-            kwargs: Extra arguments passed to the NOTEARS from_pandas function.
+            notears_mlp_kwargs: Additional arguments for the NOTEARS MLP model.
 
             target_dist_type: The distribution type of the target.
             Uses the same aliases as dist_type_schema.
@@ -162,8 +162,8 @@ def __init__(
         self.tabu_edges = tabu_edges
         self.tabu_parent_nodes = tabu_parent_nodes
         self.tabu_child_nodes = tabu_child_nodes
-        self._target_dist_type = target_dist_type
-        self.kwargs = kwargs
+        self.target_dist_type = target_dist_type
+        self.notears_mlp_kwargs = notears_mlp_kwargs
 
         # sklearn wrapper paramters
         self.dependent_target = dependent_target
@@ -206,14 +206,14 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray
             )
 
             # if its a continuous target also standardize
-            if self._target_dist_type == "cont":
+            if self.target_dist_type == "cont":
                 y = y.copy()
                 self._ss_y = StandardScaler()
                 y[:] = self._ss_y.fit_transform(y.values.reshape(-1, 1)).reshape(-1)
 
         # add the target to the dist_type_schema
         # NOTE: this must be done AFTER standardize
-        dist_type_schema[y.name] = self._target_dist_type
+        dist_type_schema[y.name] = self.target_dist_type
 
         # preserve the feature and target colnames
         self._features = tuple(X.columns)
@@ -242,7 +242,7 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray
             tabu_parent_nodes=tabu_parent_nodes,
             tabu_child_nodes=self.tabu_child_nodes,
             use_bias=self.fit_intercept,
-            **self.kwargs,
+            **(self.notears_mlp_kwargs or {}),
         )
 
         # keep thresholding until the DAG constraint is enforced
@@ -284,7 +284,7 @@ def predict(self, X: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
         y_pred = target_dist_type.get_columns(X_hat)
 
         # inverse-standardize
-        if self.standardize and self._target_dist_type == "cont":
+        if self.standardize and self.target_dist_type == "cont":
             y_pred = self._ss_y.inverse_transform(y_pred.reshape(-1, 1)).reshape(-1)
 
         return y_pred
diff --git a/causalnex/structure/pytorch/sklearn/clf.py b/causalnex/structure/pytorch/sklearn/clf.py
index a1a07a5..752e158 100644
--- a/causalnex/structure/pytorch/sklearn/clf.py
+++ b/causalnex/structure/pytorch/sklearn/clf.py
@@ -83,7 +83,7 @@ def fit(
         Fits the sm model using the concat of X and y.
 
         Raises:
-            NotImplementedError: If unsupported _target_dist_type provided.
+            NotImplementedError: If unsupported target_dist_type provided.
             ValueError: If less than 2 classes provided.
 
         Returns:
@@ -109,8 +109,8 @@ def fit(
             )
 
         # store the protected attr _target_dist_type
-        if self._target_dist_type is None:
-            self._target_dist_type = "cat" if n_classes > 2 else "bin"
+        if self.target_dist_type is None:
+            self.target_dist_type = "cat" if n_classes > 2 else "bin"
 
         # fit the NOTEARS model
         super().fit(X, y)
diff --git a/causalnex/structure/pytorch/sklearn/reg.py b/causalnex/structure/pytorch/sklearn/reg.py
index e4a8227..2d80358 100644
--- a/causalnex/structure/pytorch/sklearn/reg.py
+++ b/causalnex/structure/pytorch/sklearn/reg.py
@@ -86,8 +86,8 @@ def fit(
         """
 
         # store the protected attr _target_dist_type
-        if self._target_dist_type is None:
-            self._target_dist_type = "cont"
+        if self.target_dist_type is None:
+            self.target_dist_type = "cont"
 
         # fit the NOTEARS model
         super().fit(X, y)
diff --git a/causalnex/utils/network_utils.py b/causalnex/utils/network_utils.py
index ac19c06..8d33ca6 100644
--- a/causalnex/utils/network_utils.py
+++ b/causalnex/utils/network_utils.py
@@ -26,7 +26,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This module contains the helpfer functions for interacting with Bayesian Network
+This module contains the helper functions for interacting with Bayesian Network
 """
 
 from copy import deepcopy
diff --git a/requirements.txt b/requirements.txt
index b4c6a17..50752ab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,9 +2,9 @@ networkx~=2.5
 numpy>=1.14.2, <2.0
 pandas>=1.0, <2.0
 pathos>=0.2.7, <0.3.0
-pgmpy>=0.1.12,<0.2.0
-prettytable>=0.7.2, <0.8
-scikit-learn>=0.20.2, <0.23.0, !=0.22.2.post1
+pgmpy>=0.1.12, <0.2.0
+scikit-learn>=0.22.0, <0.25.0, !=0.22.2.post1, !=0.24.1; python_version < '3.9'
+scikit-learn>=0.24.0, <0.25.0, !=0.24.1; python_version == '3.9'
 scipy>=1.2.0, <1.6
 torch~=1.7
 wrapt>=1.11.0, <1.12
diff --git a/tests/structure/test_sklearn.py b/tests/structure/test_sklearn.py
index 21790c3..a28c2ef 100644
--- a/tests/structure/test_sklearn.py
+++ b/tests/structure/test_sklearn.py
@@ -243,6 +243,16 @@ def test_feature_importances(self, hidden_layer_units):
         # assert that the sign of the coefficient is positive for both nonlinear and linear cases
         assert coef_["true_feat"] > 0
 
+    def test_sklearn_compatibility_reg(self):
+        reg = DAGRegressor(
+            alpha=0.0,
+            fit_intercept=True,
+            dependent_target=True,
+            hidden_layer_units=[0],
+            standardize=True,
+        )
+        reg.get_params(deep=True)
+
     @pytest.mark.parametrize(
         "standardize",
         [
@@ -463,6 +473,16 @@ def test_glm(self, target_dist_type, y):
         clf.fit(X, y)
         clf.predict(X)
 
+    def test_sklearn_compatibility_clf(self):
+        clf = DAGClassifier(
+            alpha=0.0,
+            fit_intercept=True,
+            dependent_target=True,
+            hidden_layer_units=[0],
+            standardize=True,
+        )
+        clf.get_params(deep=True)
+
 
 @pytest.mark.parametrize("hidden_layer_units", [None, [1], [5], [5, 3], [10, 10]])
 def test_independent_predictions(hidden_layer_units):