Skip to content

Commit

Permalink
Merge pull request #111 from quantumblacklabs/release/0.10.0
Browse files Browse the repository at this point in the history
Release/0.10.0
  • Loading branch information
qbphilip authored May 11, 2021
2 parents b64dab0 + 2f57e47 commit b6a399f
Show file tree
Hide file tree
Showing 30 changed files with 2,058 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ repos:
exclude: ^causalnex/ebaybbn

- repo: https://github.com/psf/black
rev: stable
rev: 20.8b1
hooks:
- id: black

Expand Down
5 changes: 5 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
include README.md
include LICENSE.md
include legal_header.txt
include requirements.txt
include test_requirements.txt
11 changes: 11 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Upcoming release

# Release 0.10.0
* Add supervised discretisation strategies using Decision Tree and MDLP algorithms.
* Add `BayesianNetworkClassifier` an sklearn compatible class for fitting and predicting probabilities in a BN.
* Fixes cyclical import of `causalnex.plots`, as per #106.
* Add utility function to extract Markov blanket from a Bayesian Network
* Support receiving a list of inputs for `InferenceEngine` with a multiprocessing option
* Add supervised discretisation strategies using Decision Tree and MDLP algorithms
* Added manifest files to ensure requirements and licenses are packaged
* Fix estimator issues with sklearn ("unofficial python 3.9 support", doesn't work with `discretiser` option)
* Minor bumps in dependency versions, remove prettytable as dependency

# Release 0.9.2
* Remove Boston housing dataset from "sklearn tutorial", see #91 for more information.
* Update pylint version to 2.7
Expand Down
2 changes: 1 addition & 1 deletion causalnex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
causalnex toolkit for causal reasoning (Bayesian Networks / Inference)
"""

__version__ = "0.9.2"
__version__ = "0.10.0"

__all__ = ["structure", "discretiser", "evaluation", "inference", "network", "plots"]
10 changes: 9 additions & 1 deletion causalnex/discretiser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@
``causalnex.discretiser`` provides functionality to discretise data.
"""

__all__ = ["Discretiser"]
__all__ = [
"Discretiser",
"DecisionTreeSupervisedDiscretiserMethod",
"MDLPSupervisedDiscretiserMethod",
]

from .discretiser import Discretiser
from .discretiser_strategy import (
DecisionTreeSupervisedDiscretiserMethod,
MDLPSupervisedDiscretiserMethod,
)
114 changes: 114 additions & 0 deletions causalnex/discretiser/abstract_discretiser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Copyright 2019-2020 QuantumBlack Visual Analytics Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
#
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tools to help discretise data."""

import logging
from abc import ABC, abstractmethod
from typing import List

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator


class AbstractSupervisedDiscretiserMethod(BaseEstimator, ABC):
"""
Base class for advanced discretisation methods
"""

def __init__(self):
self.map_thresholds = {}
self.feat_names = None

@abstractmethod
def fit(
self,
feat_names: List[str],
target: str,
dataframe: pd.DataFrame,
target_continuous: bool,
):
"""
Discretise the features in `feat_names` in such a way that maximises the prediction of `target`.
Args:
feat_names (List[str]): List of feature names to be discretised.
target (str): Name of the target variable - the node that adjusts how `feat_names` will be discretised
dataframe: The full dataset prior to discretisation.
target_continuous (bool): Boolean indicates if target variable is continuous
Raises:
NotImplementedError: AbstractSupervisedDiscretiserMethod should not be called directly
"""
raise NotImplementedError("The method is not implemented")

def _transform_one_column(self, dataframe_one_column: pd.DataFrame) -> np.array:
"""
Given one "original" feature (continuous), discretise it.
Args:
dataframe_one_column: dataframe with a single continuous feature, to be transformed into discrete
Returns:
Discrete feature, as an np.array of shape (len(df),)
"""
cols = list(dataframe_one_column.columns)
if cols[0] in self.map_thresholds:
split_points = self.map_thresholds[cols[0]]
return np.digitize(dataframe_one_column.values.reshape(-1), split_points)

if cols[0] not in self.feat_names:
logging.warning(
"%s is not in feat_names. The column is left unchanged", cols[0]
)
return dataframe_one_column.values.reshape(-1)

def transform(self, data: pd.DataFrame) -> np.array:
"""
Given one "original" dataframe, discretise it.
Args:
data: dataframe with continuous features, to be transformed into discrete
Returns:
discretised version of the input data
"""
outputs = {}
for col in data.columns:
outputs[col] = self._transform_one_column(data[[col]])

transformed_df = pd.DataFrame.from_dict(outputs)
return transformed_df

def fit_transform(self, *args, **kwargs):
"""
Raises:
NotImplementedError: fit_transform is not implemented
"""
raise NotImplementedError(
"fit_transform is not implemented. Please use .fit() and .transform() separately"
)
Loading

0 comments on commit b6a399f

Please sign in to comment.