From 5a19fc67657ce96aa58fb78fa84035100d7abcc4 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Mon, 11 Sep 2023 15:08:51 -0400 Subject: [PATCH 01/70] ENH initialize with MIRF_AUC and MIRF_MV Co-Authored-By: Sambit Panda <36676569+sampan501@users.noreply.github.com> Co-Authored-By: Yuxin <99897042+YuxinB@users.noreply.github.com> Co-Authored-By: Adam Li <3460267+adam2392@users.noreply.github.com> --- sktree/stats/__init__.py | 1 + sktree/stats/_might.py | 288 +++++++++++++++++++++++++++++++++++++++ sktree/stats/meson.build | 10 ++ 3 files changed, 299 insertions(+) create mode 100644 sktree/stats/__init__.py create mode 100644 sktree/stats/_might.py create mode 100644 sktree/stats/meson.build diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py new file mode 100644 index 000000000..d27397e5f --- /dev/null +++ b/sktree/stats/__init__.py @@ -0,0 +1 @@ +from ._might import MIRF_AUC, MIRF_MV diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py new file mode 100644 index 000000000..860b210e1 --- /dev/null +++ b/sktree/stats/_might.py @@ -0,0 +1,288 @@ +import numpy as np +from joblib import Parallel, delayed +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import train_test_split + +from ..ensemble import HonestForestClassifier + + +def auc_calibrator(tree, X, y, test_size=0.2, permute_y=False): + indices = np.arange(X.shape[0]) + X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split( + X, y, indices, test_size=test_size + ) + + # individual tree permutation of y labels + if permute_y: + y_train = np.random.permutation(y_train) + + tree.fit(X_train, y_train) + y_pred = tree.predict_proba(X_test)[:, 1] + + # Fill test set posteriors & set rest NaN + posterior = np.full(y.shape, np.nan) + posterior[indices_test] = y_pred + + return posterior + + +def perm_stat(clf, x, z, y, random_state=None): + permuted_Z = np.random.permutation(z) + X_permutedZ = np.hstack((x, permuted_Z)) + perm_stat = clf.statistic(X_permutedZ, y) + return perm_stat + + +def perm_half(clf, z, y, x_pos): + permuted_Z = np.random.permutation(z) + perm_stat, perm_pos = clf.statistic(permuted_Z, y, return_pos=True) + null_pos = forest_pos(x_pos + perm_pos, y) + null_stat = roc_auc_score(null_pos[:, 0], null_pos[:, 1], max_fpr=clf.limit) + + return null_stat + + +def pos_diff(observe_pos, perm_pos, limit): + total_pos = np.random.shuffle(np.concatenate((observe_pos, perm_pos))) + + half_ind = len(total_pos) * 0.5 + half_pos = total_pos[:half_ind] + end_pos = total_pos[half_ind:] + + half_pos_final = forest_pos(half_pos, y) + half_stat = roc_auc_score(half_pos_final[:, 0], half_pos_final[:, 1], max_fpr=limit) + + end_pos_final = forest_pos(end_pos, y) + end_stat = roc_auc_score(end_pos_final[:, 0], end_pos_final[:, 1], max_fpr=limit) + + return abs(half_stat - end_stat) + + +def forest_pos(posterior, y): + # Average all posteriors + posterior_final = np.nanmean(posterior, axis=0) + + # Ignore all NaN values (samples not tested) + true_final = y.ravel()[~np.isnan(posterior_final)].reshape(-1, 1) + posterior_final = posterior_final[~np.isnan(posterior_final)].reshape(-1, 1) + + return np.hstack((true_final, posterior_final)) + + +class MIRF_AUC: + def __init__( + self, + n_estimators=500, + criterion="gini", + splitter="best", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + class_weight=None, + ccp_alpha=0.0, + max_samples=None, + honest_prior="empirical", + honest_fraction=0.5, + tree_estimator=None, + limit=0.05, + ): + self.clf = HonestForestClassifier( + n_estimators=n_estimators, + criterion=criterion, + splitter=splitter, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + class_weight=class_weight, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + honest_prior=honest_prior, + honest_fraction=honest_fraction, + tree_estimator=tree_estimator, + ) + self.limit = limit + + def statistic( + self, + x, + y, + workers=1, + test_size=0.2, + initial=True, + return_pos=False, + permute_y=False, + ): + # Initialize trees + if initial: + self.clf.fit(x[0:2], y.ravel()[0:2]) + + # Compute posteriors with train test splits + posterior = Parallel(n_jobs=workers)( + delayed(auc_calibrator)(tree, x, y.ravel(), test_size, permute_y) + for tree in (self.clf.estimators_) + ) + + posterior_final = forest_pos(posterior, y) + self.stat = roc_auc_score(posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit) + + if return_pos: + return self.stat, posterior + + return self.stat + + +class MIRF_MV: + def __init__( + self, + n_estimators=500, + criterion="gini", + splitter="best", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + class_weight=None, + ccp_alpha=0.0, + max_samples=None, + honest_prior="empirical", + honest_fraction=0.5, + tree_estimator=None, + limit=0.05, + ): + self.clf = HonestForestClassifier( + n_estimators=n_estimators, + criterion=criterion, + splitter=splitter, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + class_weight=class_weight, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + honest_prior=honest_prior, + honest_fraction=honest_fraction, + tree_estimator=tree_estimator, + ) + self.limit = limit + + def statistic( + self, + x, + y, + workers=1, + test_size=0.2, + initial=True, + return_pos=False, + permute_y=False, + ): + # Initialize trees + if initial: + self.clf.fit(x[0:2], y.ravel()[0:2]) + + # Compute posteriors with train test splits + posterior = Parallel(n_jobs=workers)( + delayed(auc_calibrator)(tree, x, y.ravel(), test_size, permute_y) + for tree in (self.clf.estimators_) + ) + + # Average all posteriors + posterior_final = np.nanmean(posterior, axis=0) + + # Ignore all NaN values (samples not tested) + true_final = y.ravel()[~np.isnan(posterior_final)] + posterior_final = posterior_final[~np.isnan(posterior_final)] + + self.stat = roc_auc_score(true_final, posterior_final, max_fpr=self.limit) + + if return_pos: + return self.stat, posterior + + return self.stat + + def test(self, x, z, y, reps=1000, workers=1, random_state=None): + XZ = np.hstack((x, z)) + observe_stat = self.statistic(XZ, y) + + null_dist = np.array( + Parallel(n_jobs=workers)([delayed(perm_stat)(self, x, z, y) for _ in range(reps)]) + ) + pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) + + return observe_stat, null_dist, pval + + def test_twin(self, x, z, y, reps=1000, workers=1, random_state=None): + x_stat, x_pos = self.statistic(x, y, return_pos=True) + + # TODO: determine whether we need the forest + + z_stat, z_pos = self.statistic(z, y, return_pos=True) + + observe_pos = forest_pos(x_pos + z_pos, y) + observe_stat = roc_auc_score(observe_pos[:, 0], observe_pos[:, 1], max_fpr=self.limit) + + null_dist = np.array( + Parallel(n_jobs=workers)([delayed(perm_half)(self, z, y, x_pos) for _ in range(reps)]) + ) + pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) + + return observe_stat, null_dist, pval + + def test_diff(self, x, z, y, reps=1000, workers=1): + XZ = np.hstack((x, z)) + observe_stat, observe_pos = self.statistic(XZ, y, return_pos=True) + + # Compute statistic for permuted sets + permuted_Z = np.random.permutation(z) + X_permutedZ = np.hstack((x, permuted_Z)) + perm_stat, perm_pos = self.statistic(X_permutedZ, y, return_pos=True) + + # Boostrap sample the posterior from the two forests + null_stats = np.array( + Parallel(n_jobs=workers)( + [delayed(pos_diff)(observe_pos, perm_pos, limit=self.limit) for _ in range(reps)] + ) + ) + + stat = observe_stat - perm_stat + + pval = (1 + (null_stats >= stat).sum()) / (1 + reps) + return stat, null_stats, pval diff --git a/sktree/stats/meson.build b/sktree/stats/meson.build new file mode 100644 index 000000000..c21857d68 --- /dev/null +++ b/sktree/stats/meson.build @@ -0,0 +1,10 @@ +python_sources = [ + '__init__.py', + '_might.py', +] + +py3.install_sources( + python_sources, + pure: false, + subdir: 'sktree/stats' +) From f1a8e49d319faaad3a8875522635c472d96acdd2 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Mon, 11 Sep 2023 15:59:12 -0400 Subject: [PATCH 02/70] ENH add statistic alternatives --- sktree/stats/_might.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index 860b210e1..0fb8b66f2 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -1,5 +1,6 @@ import numpy as np from joblib import Parallel, delayed +from scipy.stats import entropy from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split @@ -126,6 +127,7 @@ def statistic( self, x, y, + stat="AUC", workers=1, test_size=0.2, initial=True, @@ -143,7 +145,16 @@ def statistic( ) posterior_final = forest_pos(posterior, y) - self.stat = roc_auc_score(posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit) + + if stat == "AUC": + self.stat = roc_auc_score( + posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit + ) + elif stat == "MI": + H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1), axis=1)) + _, counts = np.unique(posterior_final[:, 0], return_counts=True) + H_Y = entropy(counts, base=np.exp(1)) + self.stat = max(H_Y - H_YX, 0) if return_pos: return self.stat, posterior @@ -208,6 +219,7 @@ def statistic( self, x, y, + stat="AUC", workers=1, test_size=0.2, initial=True, @@ -224,14 +236,17 @@ def statistic( for tree in (self.clf.estimators_) ) - # Average all posteriors - posterior_final = np.nanmean(posterior, axis=0) - - # Ignore all NaN values (samples not tested) - true_final = y.ravel()[~np.isnan(posterior_final)] - posterior_final = posterior_final[~np.isnan(posterior_final)] + posterior_final = forest_pos(posterior, y) - self.stat = roc_auc_score(true_final, posterior_final, max_fpr=self.limit) + if stat == "AUC": + self.stat = roc_auc_score( + posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit + ) + elif stat == "MI": + H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1), axis=1)) + _, counts = np.unique(posterior_final[:, 0], return_counts=True) + H_Y = entropy(counts, base=np.exp(1)) + self.stat = max(H_Y - H_YX, 0) if return_pos: return self.stat, posterior From fd0b937577f483b56fd6281a73621596975eff9d Mon Sep 17 00:00:00 2001 From: Sambit Panda <36676569+sampan501@users.noreply.github.com> Date: Tue, 12 Sep 2023 08:15:30 -0400 Subject: [PATCH 03/70] no axis=1 when taking posterior slice in MI --- sktree/stats/_might.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index 0fb8b66f2..3c9d79421 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -151,7 +151,7 @@ def statistic( posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit ) elif stat == "MI": - H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1), axis=1)) + H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1))) _, counts = np.unique(posterior_final[:, 0], return_counts=True) H_Y = entropy(counts, base=np.exp(1)) self.stat = max(H_Y - H_YX, 0) From efc258758f42daa5da776ca96726e8d95b37a641 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 09:19:03 -0400 Subject: [PATCH 04/70] ENH add y-label permutation test to MIGHT --- sktree/stats/_might.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index 3c9d79421..e27879aa3 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -28,8 +28,12 @@ def auc_calibrator(tree, X, y, test_size=0.2, permute_y=False): def perm_stat(clf, x, z, y, random_state=None): - permuted_Z = np.random.permutation(z) - X_permutedZ = np.hstack((x, permuted_Z)) + if z is not None: + permuted_Z = np.random.permutation(z) + X_permutedZ = np.hstack((x, permuted_Z)) + else: + X_permutedZ = np.random.permutation(x) + perm_stat = clf.statistic(X_permutedZ, y) return perm_stat @@ -70,7 +74,7 @@ def forest_pos(posterior, y): return np.hstack((true_final, posterior_final)) -class MIRF_AUC: +class MIGHT: def __init__( self, n_estimators=500, @@ -161,8 +165,18 @@ def statistic( return self.stat + def test(self, x, y, reps=1000, workers=1, random_state=None): + observe_stat = self.statistic(x, y) + + null_dist = np.array( + Parallel(n_jobs=workers)([delayed(perm_stat)(self, x, None, y) for _ in range(reps)]) + ) + pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) + + return observe_stat, null_dist, pval + -class MIRF_MV: +class MIGHT_MV: def __init__( self, n_estimators=500, From d1f7748807ccf039aa429e3d708451217954faca Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 09:19:36 -0400 Subject: [PATCH 05/70] FIX rename import --- sktree/stats/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index d27397e5f..f6933ba2c 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1 +1 @@ -from ._might import MIRF_AUC, MIRF_MV +from ._might import MIGHT, MIGHT_MV From d4abb4aed585483bc190fd077ffe9d1472092fb0 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 10:16:26 -0400 Subject: [PATCH 06/70] FIX, correct function param --- sktree/stats/_might.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index e27879aa3..d76cdcbbf 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -47,7 +47,7 @@ def perm_half(clf, z, y, x_pos): return null_stat -def pos_diff(observe_pos, perm_pos, limit): +def pos_diff(observe_pos, perm_pos, y, limit): total_pos = np.random.shuffle(np.concatenate((observe_pos, perm_pos))) half_ind = len(total_pos) * 0.5 @@ -307,7 +307,7 @@ def test_diff(self, x, z, y, reps=1000, workers=1): # Boostrap sample the posterior from the two forests null_stats = np.array( Parallel(n_jobs=workers)( - [delayed(pos_diff)(observe_pos, perm_pos, limit=self.limit) for _ in range(reps)] + [delayed(pos_diff)(observe_pos, perm_pos, y, limit=self.limit) for _ in range(reps)] ) ) From a3424827840053b7643f50c4e284c87befa1e1df Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 10:42:07 -0400 Subject: [PATCH 07/70] FIX remove axis param & TST initialize test file --- sktree/stats/_might.py | 2 +- sktree/stats/meson.build | 2 ++ sktree/stats/tests/test_might.py | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 sktree/stats/tests/test_might.py diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index d76cdcbbf..ff3d9ac2b 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -257,7 +257,7 @@ def statistic( posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit ) elif stat == "MI": - H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1), axis=1)) + H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1))) _, counts = np.unique(posterior_final[:, 0], return_counts=True) H_Y = entropy(counts, base=np.exp(1)) self.stat = max(H_Y - H_YX, 0) diff --git a/sktree/stats/meson.build b/sktree/stats/meson.build index c21857d68..d6cf64e5f 100644 --- a/sktree/stats/meson.build +++ b/sktree/stats/meson.build @@ -8,3 +8,5 @@ py3.install_sources( pure: false, subdir: 'sktree/stats' ) + +subdir('tests') diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py new file mode 100644 index 000000000..fa8db2a34 --- /dev/null +++ b/sktree/stats/tests/test_might.py @@ -0,0 +1,4 @@ +import numpy as np +import pytest + +from sktree.stats import MIGHT, MIGHT_MV From 69c76a868d10e1c9b45ddaef810347ddafbec249 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 12 Sep 2023 11:11:44 -0400 Subject: [PATCH 08/70] Adding modularity Signed-off-by: Adam Li --- doc/references.bib | 11 ++++ sktree/stats/meson.build | 1 + sktree/stats/tests/test_might.py | 0 sktree/stats/utils.py | 90 ++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+) create mode 100644 sktree/stats/tests/test_might.py create mode 100644 sktree/stats/utils.py diff --git a/doc/references.bib b/doc/references.bib index df44cfb26..58dad37dd 100644 --- a/doc/references.bib +++ b/doc/references.bib @@ -11,6 +11,17 @@ @article{breiman2001random publisher = {Springer} } +@article{coleman2022scalable, + title = {Scalable and efficient hypothesis testing with random forests}, + author = {Coleman, Tim and Peng, Wei and Mentch, Lucas}, + journal = {The Journal of Machine Learning Research}, + volume = {23}, + number = {1}, + pages = {7679--7713}, + year = {2022}, + publisher = {JMLRORG} +} + @article{Li2023manifold, author = {Li, Adam and Perry, Ronan and Huynh, Chester and Tomita, Tyler M. and Mehta, Ronak and Arroyo, Jesus and Patsolic, Jesse and Falk, Ben and Sarma, Sridevi and Vogelstein, Joshua}, title = {Manifold Oblique Random Forests: Towards Closing the Gap on Convolutional Deep Networks}, diff --git a/sktree/stats/meson.build b/sktree/stats/meson.build index c21857d68..b059cd516 100644 --- a/sktree/stats/meson.build +++ b/sktree/stats/meson.build @@ -1,6 +1,7 @@ python_sources = [ '__init__.py', '_might.py', + 'utils.py', ] py3.install_sources( diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py new file mode 100644 index 000000000..e69de29bb diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py new file mode 100644 index 000000000..7db2951de --- /dev/null +++ b/sktree/stats/utils.py @@ -0,0 +1,90 @@ +from numpy.typing import ArrayLike +import numpy as np + +from sklearn.metrics import mean_squared_error, roc_auc_score + +METRIC_FUNCTIONS = { + 'mse': mean_squared_error, + 'auc': roc_auc_score +} + +def pvalue(observe_stat: float, permuted_stat: ArrayLike) -> float: + """Compute pvalue with Coleman method. + + Implements the pvalue calculation from Algorithm 1. See + :footcite:`coleman2022scalable` for full details. + + Parameters + ---------- + observe_stat : float + The observed test statistic. + permuted_stat : ArrayLike of shape (n_repeats,) + The array of test statistics computed on permutations. + + Returns + ------- + pval : float + The pvalue. + """ + n_repeats = len(permuted_stat) + pval = (1 + (permuted_stat >= observe_stat).sum()) / (1 + n_repeats) + return pval + + +def compute_null_distribution(X_test, y_test, forest, perm_forest, metric: str='mse', n_repeats: int=1000, seed: int=None): + """Compute null distribution using Coleman method. + + The null distribution is comprised of two forests. + + Parameters + ---------- + X_test : _type_ + _description_ + y_test : _type_ + _description_ + forest : _type_ + _description_ + perm_forest : _type_ + _description_ + metric : str, optional + _description_, by default 'mse' + n_repeats : int, optional + _description_, by default 1000 + seed : int, optional + _description_, by default None + + Returns + ------- + _type_ + _description_ + """ + rng = np.random.default_rng(seed) + + metric_func = METRIC_FUNCTIONS[metric] + + # sample two sets of equal number of trees from the combined forest + y_pred_proba_normal = forest.predict_proba(X_test) + y_pred_proba_perm = perm_forest.predict_proba(X_test) + all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) + + n_samples = len(y_test) + + # pre-allocate memory for the index array + index_arr = np.arange(n_samples * 2, dtype=int) + + metric_star = [] + metric_star_pi = [] + for idx in range(n_repeats): + # two sets of random indices from 1 : 2N are sampled using Fisher-Yates + rng.shuffle(index_arr) + first_half_index = index_arr[:n_samples] + second_half_index = index_arr[n_samples:] + + # compute two instances of the metric from the sampled trees + first_half_metric = metric_func(y_true=y_test, y_pred=all_y_pred[first_half_index]) + second_half_metric = metric_func(y_true=y_test, y_pred=all_y_pred[second_half_index]) + + metric_star.append(first_half_metric) + metric_star_pi.append(second_half_metric) + + return metric_star, metric_star_pi \ No newline at end of file From b3ab11d4c7f0ed994ee5bf23050c342714b84cb9 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 11:29:34 -0400 Subject: [PATCH 09/70] TST experiment with unit test --- sktree/__init__.py | 3 +- sktree/meson.build | 5 ++-- sktree/stats/tests/__init__.py | 0 sktree/stats/tests/meson.build | 10 +++++++ sktree/stats/tests/test_might.py | 51 +++++++++++++++++++++++++++++++- 5 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 sktree/stats/tests/__init__.py create mode 100644 sktree/stats/tests/meson.build diff --git a/sktree/__init__.py b/sktree/__init__.py index 451379644..ce1c4b2e2 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -36,7 +36,7 @@ # process, as it may not be compiled yet else: try: - from . import _lib, tree, ensemble, experimental + from . import _lib, tree, ensemble, experimental, stats from ._lib.sklearn.ensemble._forest import ( RandomForestClassifier, RandomForestRegressor, @@ -68,6 +68,7 @@ "tree", "experimental", "ensemble", + "stats", "ExtraObliqueRandomForestClassifier", "ExtraObliqueRandomForestRegressor", "NearestNeighborsMetaEstimator", diff --git a/sktree/meson.build b/sktree/meson.build index 0d5518a73..70b709920 100644 --- a/sktree/meson.build +++ b/sktree/meson.build @@ -80,7 +80,8 @@ cython_c_args += ['-Wno-cpp'] cython_cpp_args = cython_c_args subdir('_lib') -subdir('tree') subdir('ensemble') subdir('experimental') -subdir('tests') \ No newline at end of file +subdir('stats') +subdir('tests') +subdir('tree') diff --git a/sktree/stats/tests/__init__.py b/sktree/stats/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sktree/stats/tests/meson.build b/sktree/stats/tests/meson.build new file mode 100644 index 000000000..376ceb35a --- /dev/null +++ b/sktree/stats/tests/meson.build @@ -0,0 +1,10 @@ +python_sources = [ + '__init__.py', + 'test_might.py', +] + +py3.install_sources( + python_sources, + pure: false, + subdir: 'sktree/stats/tests' +) diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py index fa8db2a34..7eea4c778 100644 --- a/sktree/stats/tests/test_might.py +++ b/sktree/stats/tests/test_might.py @@ -1,4 +1,53 @@ import numpy as np import pytest +from sklearn import datasets +from sklearn.utils.estimator_checks import parametrize_with_checks -from sktree.stats import MIGHT, MIGHT_MV +from sktree._lib.sklearn.tree import DecisionTreeClassifier +from sktree.stats import MIGHT +from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier + +# load the iris dataset +# and randomly permute it +iris = datasets.load_iris() +rng = np.random.RandomState(1) + +# remove third class +iris_X = iris.data[iris.target != 2] +iris_y = iris.target[iris.target != 2] + +p = rng.permutation(iris_X.shape[0]) +iris_X = iris_X[p] +iris_y = iris_y[p] + + +@pytest.mark.parametrize("criterion", ["gini", "entropy"]) +@pytest.mark.parametrize("max_features", [None, 2]) +@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore", "error"]) +@pytest.mark.parametrize( + "estimator", + [ + None, + DecisionTreeClassifier(), + ObliqueDecisionTreeClassifier(), + PatchObliqueDecisionTreeClassifier(), + ], +) +@pytest.mark.parametrize("limit", [0.05, 0.1]) +def test_iris(criterion, max_features, honest_prior, estimator, limit): + # Check consistency on dataset iris. + clf = MIGHT( + criterion=criterion, + random_state=0, + max_features=max_features, + n_estimators=10, + honest_prior=honest_prior, + tree_estimator=estimator, + limit=limit, + ) + if honest_prior == "error": + with pytest.raises(ValueError, match="honest_prior error not a valid input."): + clf.statistic(iris_X, iris_y) + else: + score = clf.statistic(iris_X, iris_y, stat="AUC") + assert score == 1.0, "Failed with pAUC: {0} for max fpr: {1}".format(pAUC, limit) From 4ed31f8c4f1f65ac4cf19736b4b6e8c7b6526c31 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 11:35:22 -0400 Subject: [PATCH 10/70] FIX correct variable name --- sktree/stats/tests/test_might.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py index 7eea4c778..fb7949434 100644 --- a/sktree/stats/tests/test_might.py +++ b/sktree/stats/tests/test_might.py @@ -1,7 +1,6 @@ import numpy as np import pytest from sklearn import datasets -from sklearn.utils.estimator_checks import parametrize_with_checks from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import MIGHT @@ -50,4 +49,4 @@ def test_iris(criterion, max_features, honest_prior, estimator, limit): clf.statistic(iris_X, iris_y) else: score = clf.statistic(iris_X, iris_y, stat="AUC") - assert score == 1.0, "Failed with pAUC: {0} for max fpr: {1}".format(pAUC, limit) + assert score == 1.0, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) From 9114859cd2129e264f8094ad2bdba26f0dfa8f34 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 12 Sep 2023 14:38:00 -0400 Subject: [PATCH 11/70] TST remove patch oblique tree tests --- sktree/stats/tests/test_might.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py index fb7949434..825137217 100644 --- a/sktree/stats/tests/test_might.py +++ b/sktree/stats/tests/test_might.py @@ -29,7 +29,6 @@ None, DecisionTreeClassifier(), ObliqueDecisionTreeClassifier(), - PatchObliqueDecisionTreeClassifier(), ], ) @pytest.mark.parametrize("limit", [0.05, 0.1]) @@ -49,4 +48,4 @@ def test_iris(criterion, max_features, honest_prior, estimator, limit): clf.statistic(iris_X, iris_y) else: score = clf.statistic(iris_X, iris_y, stat="AUC") - assert score == 1.0, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) + assert score >= 0.9, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) From 6b526081eb144ca90946f753baef01ffee0ef609 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 13 Sep 2023 17:29:32 -0400 Subject: [PATCH 12/70] WIP --- sktree/__init__.py | 2 + sktree/ensemble/_honest_forest.py | 2 + sktree/stats/__init__.py | 1 + sktree/stats/forestht.py | 255 ++++++++++++----- sktree/stats/permutationforest.py | 420 ++++++++++++++++++++++++++++ sktree/stats/tests/test_forestht.py | 130 ++++++++- sktree/stats/utils.py | 46 +-- 7 files changed, 764 insertions(+), 92 deletions(-) create mode 100644 sktree/stats/permutationforest.py diff --git a/sktree/__init__.py b/sktree/__init__.py index c6af80ea0..8a9e32eb5 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -58,6 +58,7 @@ PatchObliqueRandomForestRegressor, ) from .ensemble._honest_forest import HonestForestClassifier + from .stats import ForestHT except ImportError as e: msg = """Error importing scikit-tree: you cannot import scikit-tree while being in scikit-tree source directory; please exit the scikit-tree source @@ -85,4 +86,5 @@ "ExtraTreesClassifier", "ExtraTreesRegressor", "ExtendedIsolationForest", + "ForestHT", ] diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py index 1c5edbefb..5ab5eca64 100644 --- a/sktree/ensemble/_honest_forest.py +++ b/sktree/ensemble/_honest_forest.py @@ -5,6 +5,7 @@ import numpy as np from joblib import Parallel, delayed +from sklearn.base import _fit_context from sklearn.ensemble._base import _partition_estimators from sklearn.utils.validation import check_is_fitted, check_X_y @@ -374,6 +375,7 @@ def __init__( self.honest_prior = honest_prior self.tree_estimator = tree_estimator + @_fit_context(prefer_skip_nested_validation=True) def fit(self, X, y, sample_weight=None, classes=None): """ Build a forest of trees from the training set (X, y). diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index f6933ba2c..77cdb9372 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1 +1,2 @@ from ._might import MIGHT, MIGHT_MV +from .forestht import ForestHT diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index d8e6d9f54..8fdbddee4 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,13 +1,13 @@ import numpy as np -from joblib import Parallel, delayed from numpy.typing import ArrayLike from sklearn.base import MetaEstimatorMixin, clone from sklearn.model_selection import train_test_split +from sklearn.utils.validation import _is_fitted, check_X_y -from sktree import HonestForestClassifier from sktree._lib.sklearn.ensemble._forest import ForestClassifier from sktree._lib.sklearn.tree import DecisionTreeClassifier +from ..ensemble import HonestForestClassifier from .utils import METRIC_FUNCTIONS, _compute_null_distribution_coleman, _pvalue @@ -17,7 +17,7 @@ def tree_posterior( y: ArrayLike, covariate_index: ArrayLike = None, test_size: float = 0.2, - seed: int = None, + seed=None, ) -> ArrayLike: """Compute the posterior from each tree on the "OOB" samples. @@ -43,31 +43,32 @@ def tree_posterior( The predicted posterior probabilities for each OOB sample from the tree. For any in-bag samples, the posterior is NaN. """ - rng = np.random.default_rng(seed) + # seed the random number generator using each tree's random seed(?) + rng = np.random.default_rng(tree.random_state) indices = np.arange(X.shape[0]) if covariate_index is not None: # perform permutation of covariates - index_arr = rng.choice(indices, size=X.shape[0], replace=False, shuffle=False) + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) perm_X_cov = X[index_arr, covariate_index] X[:, covariate_index] = perm_X_cov # XXX: we can replace this using Forest's generator for the in-bag/oob sample indices when # https://github.com/scikit-learn/scikit-learn/pull/26736 is merged - X_train, X_test, y_train, _, _, indices_test = train_test_split( - X, y, indices, test_size=test_size - ) + # X_train, X_test, y_train, _, _, indices_test = train_test_split( + # X, y, indices, test_size=test_size + # ) # individual tree permutation of y labels - tree.fit(X_train, y_train) - y_pred = tree.predict_proba(X_test)[:, 1] + tree.fit(X, y, check_input=False) + # y_pred = tree.predict_proba(X_test)[:, 1] # Fill test set posteriors & set rest NaN - posterior = np.full(y.shape, np.nan) - posterior[indices_test] = y_pred + # posterior = np.full((y.shape[0], tree.n_outputs_), np.nan) + # posterior[indices_test] = y_pred.reshape(-1, tree.n_outputs_) - return posterior + # return posterior class ForestHT(MetaEstimatorMixin): @@ -245,9 +246,6 @@ class ForestHT(MetaEstimatorMixin): Type of decision tree classifier to use. By default `None`, which defaults to :class:`sklearn.tree.DecisionTreeClassifier`. - alpha : float, optional - Rejection threshold, by default 0.05. - Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -287,7 +285,6 @@ def __init__( honest_prior="empirical", honest_fraction=0.5, tree_estimator=None, - alpha=0.05, ): self.estimator = HonestForestClassifier( n_estimators=n_estimators, @@ -313,7 +310,6 @@ def __init__( honest_fraction=honest_fraction, tree_estimator=tree_estimator, ) - self.alpha = alpha self.n_jobs = n_jobs self.n_estimators = n_estimators self.criterion = criterion @@ -345,27 +341,58 @@ def _statistic( covariate_index: ArrayLike = None, metric="auc", test_size=0.2, + return_posteriors: bool = False, **metric_kwargs, ): """Helper function to compute the test statistic.""" metric_func = METRIC_FUNCTIONS[metric] + rng = np.random.default_rng(self.random_state) # first run a dummy fit on just two samples to initialize the # internal data structure of the forest - estimator.fit(X[:2], y[:2]) + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) # Fit each tree and ompute posteriors with train test splits - posterior = Parallel(n_jobs=self.n_jobs)( - delayed(tree_posterior)(tree, X, y, covariate_index, test_size) - for tree in (estimator.estimators_) - ) + n_samples = X.shape[0] + indices = np.arange(n_samples, dtype=int) + posterior_arr = np.zeros((self.n_estimators, n_samples, self.estimator.n_classes_)) + for idx in range(self.n_estimators): + seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) + indices_train, indices_test = train_test_split( + indices, test_size=test_size, stratify=y, shuffle=True, + random_state=seed + ) + tree = estimator.estimators_[idx] + tree_posterior(tree, X[indices_train, :], y[indices_train, :], covariate_index, test_size, seed=seed) + + y_pred = tree.predict_proba(X[indices_test, :]) + + # Fill test set posteriors & set rest NaN + posterior = np.full((y.shape[0], tree.n_classes_), np.nan) + posterior[indices_test, :] = y_pred + posterior_arr[idx, ...] = posterior # Average all posteriors - posterior_final = np.nanmean(posterior, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1))[0] + posterior_final = np.nanmean(posterior_arr, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() y_true_final = y[samples, :] posterior_final = posterior_final[samples, :] - stat = metric_func(y_true=y_true_final, y_pred=posterior_final, **metric_kwargs) + if metric == "auc": + if posterior_final.shape[1] != 2: + raise ValueError( + "AUC only supports binary classification. " + "Please use a different metric." + ) + print(posterior_final[:5, :]) + # get posteriors of the positive class + posterior_final = posterior_final[:, 1] + + print('Y true: ', y_true_final) + print('posterior: ', posterior_final) + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) @@ -374,15 +401,20 @@ def _statistic( self.y_true_final_ = y_true_final self.posterior_final_ = posterior_final self.stat_ = stat - else: - if not np.array_equal(samples, self.samples_): - raise ValueError( - "The samples used in the final test are not the same as the " - "samples used in the initial test on the non-permuted samples." - ) + + if return_posteriors: + return stat, posterior_final, samples return stat + def reset(self): + class_attributes = dir(type(self)) + instance_attributes = dir(self) + + for attr_name in instance_attributes: + if attr_name.endswith("_") and attr_name not in class_attributes: + delattr(self, attr_name) + def statistic( self, X: ArrayLike, @@ -390,8 +422,44 @@ def statistic( covariate_index: ArrayLike = None, metric="auc", test_size=0.2, + return_posteriors: bool = False, **metric_kwargs, ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "auc". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + if covariate_index is None: estimator = self.estimator else: @@ -405,6 +473,7 @@ def statistic( covariate_index=covariate_index, metric=metric, test_size=test_size, + return_posteriors=return_posteriors, **metric_kwargs, ) @@ -416,51 +485,107 @@ def test( metric: str = "auc", test_size: float = 0.2, n_repeats: int = 1000, + return_posteriors: bool = False, **metric_kwargs, ): - # first compute the test statistic on the un-permuted data - observe_stat = self.statistic(X, y, metric=metric, test_size=test_size, **metric_kwargs) - - if self.method == "permutation": - # compute the null distribution by computing a second forest `n_repeats` times using - # permutations of the covariate - null_dist = np.array( - Parallel(n_jobs=self.n_jobs)( - [ - delayed(self.statistic)( - self, X, y, covariate_index, metric, test_size, **metric_kwargs - ) - for _ in range(n_repeats) - ] - ) - ) - elif self.method == "coleman": + """Perform hypothesis test using Coleman method. + + X is split into a training/testing split. Optionally, the covariate index + columns are shuffled. + + On the training dataset, two honest forests are trained and then the posterior + is estimated on the testing dataset. One honest forest is trained on the + permuted dataset and the other is trained on the original dataset. + + Finally, resample the posteriors of the two forests to compute the null + distribution of the statistics. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "auc". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + n_repeats : int, optional + Number of times to sample the null distribution, by default 1000. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + pval : float + The p-value of the test statistic. + """ + X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data - permute_stat = self.statistic( + observe_stat, observe_posteriors, observe_samples = self.statistic( X, y, - covariate_index=covariate_index, + covariate_index=None, metric=metric, test_size=test_size, + return_posteriors=True, **metric_kwargs, ) + else: + observe_samples = self.samples_ + observe_posteriors = self.posterior_final_ + observe_stat = self.stat_ - # XXX: make sure train/test split before everything; rn there is prolly data leakage - metric_star, metric_star_pi = _compute_null_distribution_coleman( - X, - y, - self.estimator, - self.permuted_estimator_, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - # metric^\pi - metric - observe_stat = permute_stat - observe_stat + # next permute the data + permute_stat, permute_posteriors, permute_samples = self.statistic( + X, + y, + covariate_index=covariate_index, + metric=metric, + test_size=test_size, + return_posteriors=True, + **metric_kwargs, + ) + + # Note: at this point, both `estimator` and `permuted_estimator_` should + # have been fitted already, so we can now compute on the null by resampling + # the posteriors and computing the test statistic on the resampled posteriors + metric_star, metric_star_pi = _compute_null_distribution_coleman( + X_test=X, + y_test=y, + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + normal_samples=observe_samples, + perm_samples=permute_samples, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + print(observe_posteriors) + print(permute_posteriors) + # metric^\pi - metric + observe_stat = permute_stat - observe_stat - # metric^\pi_j - metric_j - null_dist = metric_star_pi - metric_star + # metric^\pi_j - metric_j + null_dist = metric_star_pi - metric_star pval = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) + + if return_posteriors: + self.observe_posteriors_ = observe_posteriors + self.permute_posteriors_ = permute_posteriors + self.observe_samples_ = observe_samples + self.permute_samples_ = permute_samples + self.null_dist_ = null_dist return observe_stat, pval + + diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py new file mode 100644 index 000000000..37af8eef2 --- /dev/null +++ b/sktree/stats/permutationforest.py @@ -0,0 +1,420 @@ +import numpy as np +from joblib import Parallel, delayed +from numpy.typing import ArrayLike +from sklearn.base import MetaEstimatorMixin +from sklearn.utils.validation import _is_fitted + +from ..ensemble import HonestForestClassifier +from .utils import METRIC_FUNCTIONS, _pvalue + + +class PermutationForest(MetaEstimatorMixin): + """Hypothesis testing with a permutation forest. + + This implements permutation testing of a null hypothesis using a random forest. + The null hypothesis is generated by permuting ``n_repeats`` times the covariate + indices and then a random forest is trained for each permuted instance. This + is compared to the original random forest that was computed on the regular + non-permuted data. + + Parameters + ---------- + n_estimators : int, default=100 + The number of trees in the forest. + + criterion : {"gini", "entropy"}, default="gini" + The function to measure the quality of a split. Supported criteria are + "gini" for the Gini impurity and "entropy" for the information gain. + Note: this parameter is tree-specific. + + splitter : {"best", "random"}, default="best" + The strategy used to choose the split at each node. Supported + strategies are "best" to choose the best split and "random" to choose + the best random split. + + max_depth : int, default=None + The maximum depth of the tree. If None, then nodes are expanded until + all leaves are pure or until all leaves contain less than + min_samples_split samples. + + min_samples_split : int or float, default=2 + The minimum number of samples required to split an internal node: + + - If int, then consider `min_samples_split` as the minimum number. + - If float, then `min_samples_split` is a fraction and + `ceil(min_samples_split * n_samples)` are the minimum + number of samples for each split. + + min_samples_leaf : int or float, default=1 + The minimum number of samples required to be at a leaf node. + A split point at any depth will only be considered if it leaves at + least ``min_samples_leaf`` training samples in each of the left and + right branches. This may have the effect of smoothing the model, + especially in regression. + + - If int, then consider `min_samples_leaf` as the minimum number. + - If float, then `min_samples_leaf` is a fraction and + `ceil(min_samples_leaf * n_samples)` are the minimum + number of samples for each node. + + min_weight_fraction_leaf : float, default=0.0 + The minimum weighted fraction of the sum total of weights (of all + the input samples) required to be at a leaf node. Samples have + equal weight when sample_weight is not provided. + + max_features : {"sqrt", "log2", None}, int or float, default="sqrt" + The number of features to consider when looking for the best split: + + - If int, then consider `max_features` features at each split. + - If float, then `max_features` is a fraction and + `round(max_features * n_features)` features are considered at each + split. + - If "auto", then `max_features=sqrt(n_features)`. + - If "sqrt", then `max_features=sqrt(n_features)`. + - If "log2", then `max_features=log2(n_features)`. + - If None, then `max_features=n_features`. + + Note: the search for a split does not stop until at least one + valid partition of the node samples is found, even if it requires to + effectively inspect more than ``max_features`` features. + + max_leaf_nodes : int, default=None + Grow trees with ``max_leaf_nodes`` in best-first fashion. + Best nodes are defined as relative reduction in impurity. + If None then unlimited number of leaf nodes. + + min_impurity_decrease : float, default=0.0 + A node will be split if this split induces a decrease of the impurity + greater than or equal to this value. + + The weighted impurity decrease equation is the following:: + + N_t / N * (impurity - N_t_R / N_t * right_impurity + - N_t_L / N_t * left_impurity) + + where ``N`` is the total number of samples, ``N_t`` is the number of + samples at the current node, ``N_t_L`` is the number of samples in the + left child, and ``N_t_R`` is the number of samples in the right child. + + ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, + if ``sample_weight`` is passed. + + bootstrap : bool, default=True + Whether bootstrap samples are used when building trees. If False, the + whole dataset is used to build each tree. + + oob_score : bool, default=False + Whether to use out-of-bag samples to estimate the generalization score. + Only available if bootstrap=True. + + n_jobs : int, default=None + The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, + :meth:`decision_path` and :meth:`apply` are all parallelized over the + trees. ``None`` means 1 unless in a `joblib.parallel_backend` + context. ``-1`` means using all processors. See :term:`Glossary + ` for more details. + + random_state : int, RandomState instance or None, default=None + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. + + verbose : int, default=0 + Controls the verbosity when fitting and predicting. + + warm_start : bool, default=False + When set to ``True``, reuse the solution of the previous call to fit + and add more estimators to the ensemble, otherwise, just fit a whole + new forest. See :term:`the Glossary `. + + class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ + default=None + Weights associated with classes in the form ``{class_label: weight}``. + If not given, all classes are supposed to have weight one. For + multi-output problems, a list of dicts can be provided in the same + order as the columns of y. + + Note that for multioutput (including multilabel) weights should be + defined for each class of every column in its own dict. For example, + for four-class multilabel classification weights should be + [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of + [{1:1}, {2:5}, {3:1}, {4:1}]. + + The "balanced" mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies in the input data + as ``n_samples / (n_classes * np.bincount(y))`` + + The "balanced_subsample" mode is the same as "balanced" except that + weights are computed based on the bootstrap sample for every tree + grown. + + For multi-output, the weights of each column of y will be multiplied. + + Note that these weights will be multiplied with sample_weight (passed + through the fit method) if sample_weight is specified. + + ccp_alpha : non-negative float, default=0.0 + Complexity parameter used for Minimal Cost-Complexity Pruning. The + subtree with the largest cost complexity that is smaller than + ``ccp_alpha`` will be chosen. By default, no pruning is performed. See + :ref:`minimal_cost_complexity_pruning` for details. + + max_samples : int or float, default=None + If bootstrap is True, the number of samples to draw from X + to train each base tree estimator. + + - If None (default), then draw `X.shape[0]` samples. + - If int, then draw `max_samples` samples. + - If float, then draw `max_samples * X.shape[0]` samples. Thus, + `max_samples` should be in the interval `(0.0, 1.0]`. + + honest_prior : {"ignore", "uniform", "empirical"}, default="empirical" + Method for dealing with empty leaves during evaluation of a test + sample. If "ignore", the tree is ignored. If "uniform", the prior tree + posterior is 1/(number of classes). If "empirical", the prior tree + posterior is the relative class frequency in the voting subsample. + If all trees are ignored, the empirical estimate is returned. + + honest_fraction : float, default=0.5 + Fraction of training samples used for estimates in the trees. The + remaining samples will be used to learn the tree structure. A larger + fraction creates shallower trees with lower variance estimates. + + tree_estimator : object, default=None + Type of decision tree classifier to use. By default `None`, which + defaults to :class:`sklearn.tree.DecisionTreeClassifier`. + + Attributes + ---------- + samples_ : ArrayLike of shape (n_samples,) + The indices of the samples used in the final test. + + y_true_ : ArrayLike of shape (n_samples_final,) + The true labels of the samples used in the final test. + + posterior_ : ArrayLike of shape (n_samples_final, n_outputs) + The predicted posterior probabilities of the samples used in the final test. + + null_dist_ : ArrayLike of shape (n_repeats,) + The null distribution of the test statistic. + + posterior_null_ : ArrayLike of shape (n_samples_final, n_outputs, n_repeats) + The posterior probabilities of the samples used in the final test for each + permutation for the null distribution. + """ + + def __init__( + self, + n_estimators=100, + criterion="gini", + splitter="best", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + class_weight=None, + ccp_alpha=0.0, + max_samples=None, + honest_prior="empirical", + honest_fraction=0.5, + tree_estimator=None, + ): + self.estimator = HonestForestClassifier( + n_estimators=n_estimators, + criterion=criterion, + splitter=splitter, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + class_weight=class_weight, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + honest_prior=honest_prior, + honest_fraction=honest_fraction, + tree_estimator=tree_estimator, + ) + self.n_jobs = n_jobs + self.n_estimators = n_estimators + self.criterion = criterion + self.splitter = splitter + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.bootstrap = bootstrap + self.oob_score = oob_score + self.random_state = random_state + self.verbose = verbose + self.warm_start = warm_start + self.class_weight = class_weight + self.ccp_alpha = ccp_alpha + self.max_samples = max_samples + self.honest_prior = honest_prior + self.honest_fraction = honest_fraction + self.tree_estimator = tree_estimator + + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="auc", + test_size=0.2, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "auc". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + rng = np.random.default_rng(self.random_state) + metric_func = METRIC_FUNCTIONS[metric] + + # first run a dummy fit on just two samples to initialize the + # internal data structure of the forest + if not _is_fitted(self.estimator): + self.estimator.fit(X[:2], y[:2]) + + # Fit each tree and ompute posteriors with train test splits + posterior = Parallel(n_jobs=self.n_jobs)( + delayed(tree_posterior)( + tree, X, y, covariate_index=covariate_index, test_size=test_size + ) + for tree in self.estimator.estimators_ + ) + + # Average all posteriors + posterior_final = np.nanmean(posterior, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1))[0] + y_true_final = y[samples, :] + posterior_final = posterior_final[samples, :] + stat = metric_func(y_true=y_true_final, y_pred=posterior_final, **metric_kwargs) + + # Cache the original test statistic + if covariate_index is None: + self.samples_ = samples + self.y_true_ = y_true_final + self.stat_ = stat + self.posterior_ = posterior_final + + if return_posteriors: + return stat, posterior_final, samples + return stat + + def test( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike, + metric: str = "auc", + test_size: float = 0.2, + n_repeats: int = 1000, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Perform hypothesis test using permutation testing. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike of shape (n_covariates,) + The covariate indices of ``X`` to shuffle. + metric : str, optional + Metric to compute, by default "auc". + test_size : float, optional + Size of the samples to leave out for each tree to compute posteriors on, + by default 0.2. + n_repeats : int, optional + Number of times to sample the null distribution, by default 1000. + + Returns + ------- + observe_stat : float + Observed test statistic. + pvalue : float + p-value of the test. + """ + # compute original test statistic + observe_stat = self.statistic(X, y, covariate_index, metric, test_size, **metric_kwargs) + + # compute null distribution of the test statistic + # WARNING: this could take a long time, since it fits a new forest + null_dist = np.array( + Parallel(n_jobs=self.n_jobs)( + [ + delayed(self.statistic)( + self, + X, + y, + covariate_index, + metric, + test_size, + return_posteriors, + **metric_kwargs, + ) + for _ in range(n_repeats) + ] + ) + ) + if not return_posteriors: + self.null_dist_ = null_dist + else: + self.null_dist_ = null_dist[0] + self.posterior_null_ = null_dist[1] + + pvalue = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) + return observe_stat, pvalue diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 0cb36bc26..7a45cd19c 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,6 +1,26 @@ import numpy as np - +from scipy.special import expit +import pytest from sktree.stats.forestht import ForestHT +from sklearn import datasets + +from sktree._lib.sklearn.tree import DecisionTreeClassifier +from sktree.stats import MIGHT +from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier + +# load the iris dataset +# and randomly permute it +iris = datasets.load_iris() +rng = np.random.RandomState(1) + +# remove third class +iris_X = iris.data[iris.target != 2] +iris_y = iris.target[iris.target != 2] + +p = rng.permutation(iris_X.shape[0]) +iris_X = iris_X[p] +iris_y = iris_y[p] + seed = 12345 @@ -10,16 +30,18 @@ def test_iris(): def test_linear_model(): - """Test MIGHT using MSE from linear model simulation. + r"""Test MIGHT using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. - Y = Beta * X_1 + Beta * I(X_6 = 2) + epsilon + Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon """ + # TODO: this requires us to implement the test using forestregressors + pass j = np.linspace(0.005, 2.25, 9)[0] beta = 10 sigma = 10 / j - n_samples = 2000 + n_samples = 20 n_estimators = 125 # subsample_size = np.power(n_samples, 0.6) @@ -27,7 +49,11 @@ def test_linear_model(): # sample covariates X_15 = rng.uniform(0, 1, size=(n_samples, 5)) - X_610 = rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples, 5)) + X_610 = np.zeros((n_samples, 5)) + for idx in range(5): + X_610[:, idx] = np.argwhere( + rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) + )[:, 1] X = np.concatenate((X_15, X_610), axis=1) # sample noise @@ -53,11 +79,97 @@ def test_linear_model(): assert pvalue > 0.05 -def test_mars_model(): - """Test MIGHT using MSE from linear model simulation. +def test_correlated_logit_model(): + r"""Test MIGHT using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. - Y = Beta * X_1 + Beta * I(X_6 = 2) + epsilon + P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) """ - pass + beta = 15.0 + n_samples = 600 + n_estimators = 125 + n_jobs = -1 + + n = 100 # Number of time steps + ar_coefficient = 0.015 + rng = np.random.default_rng(seed) + test_size = 0.5 + + X = np.zeros((n_samples, n)) + for idx in range(n_samples): + # sample covariates + white_noise = rng.standard_normal(size=n) + + # Create an array to store the simulated AR(1) time series + ar1_series = np.zeros(n) + ar1_series[0] = white_noise[0] + + # Simulate the AR(1) process + for t in range(1, n): + ar1_series[t] = ar_coefficient * ar1_series[t - 1] + white_noise[t] + + X[idx, :] = ar1_series + + # now compute the output labels + y_proba = expit(beta * X[:, 1:5].sum(axis=1)) + assert y_proba.shape == (n_samples,) + y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) + + est = ForestHT(max_features=n, random_state=seed, n_estimators=n_estimators, n_jobs=n_jobs) + + # test for X_2 important + stat, pvalue = est.test(X, y, [1], test_size=test_size, metric="mse") + print(pvalue) + assert pvalue < 0.6, f"pvalue: {pvalue}" + + # test for X_1 + stat, pvalue = est.test(X, y, [0], metric="mse") + print(pvalue) + assert pvalue > 0.9, f"pvalue: {pvalue}" + + # test for X_500 + stat, pvalue = est.test(X, y, [n - 1], metric="mse") + print(pvalue) + assert pvalue > 0.9, f"pvalue: {pvalue}" + + +@pytest.mark.parametrize("criterion", ["gini", "entropy"]) +@pytest.mark.parametrize("max_features", [None, 'sqrt']) +@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"]) +@pytest.mark.parametrize( + "estimator", + [ + None, + DecisionTreeClassifier(), + ObliqueDecisionTreeClassifier(), + ], +) +@pytest.mark.parametrize("limit", [0.05, 0.1]) +def test_iris_pauc(criterion, max_features, honest_prior, estimator, limit): + # Check consistency on dataset iris. + clf = ForestHT( + criterion=criterion, + random_state=0, + max_features=max_features, + n_estimators=100, + honest_prior=honest_prior, + tree_estimator=estimator, + ) + score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) + assert score >= 0.9, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) + + # now add completely uninformative feature + X = np.hstack((iris_X, rng.standard_normal(size=(iris_X.shape[0], 1)))) + + # test for unimportant feature + test_size = 0.2 + clf.reset() + stat, pvalue = clf.test(X, iris_y, [X.shape[1] - 1], test_size=test_size, metric="auc") + print(pvalue) + # assert pvalue > 0.05, f"pvalue: {pvalue}" + + stat, pvalue = clf.test(X, iris_y, [2, 3], test_size=test_size, metric="auc") + print(pvalue) + # assert pvalue < 0.05, f"pvalue: {pvalue}" + assert False \ No newline at end of file diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 9bc196d36..baa3e1c42 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -4,7 +4,7 @@ from numpy.typing import ArrayLike from scipy.stats import entropy from sklearn.metrics import mean_squared_error, roc_auc_score -from sklearn.utils.validation import check_is_fitted, check_X_y +from sklearn.utils.validation import check_X_y from sktree._lib.sklearn.ensemble._forest import ForestClassifier @@ -107,8 +107,10 @@ def compute_null_distribution_perm( def _compute_null_distribution_coleman( X_test: ArrayLike, y_test: ArrayLike, - forest: ForestClassifier, - perm_forest: ForestClassifier, + y_pred_proba_normal: ArrayLike, + y_pred_proba_perm: ArrayLike, + normal_samples: ArrayLike, + perm_samples: ArrayLike, metric: str = "mse", n_repeats: int = 1000, seed: int = None, @@ -123,10 +125,14 @@ def _compute_null_distribution_coleman( The data matrix. y_test : ArrayLike of shape (n_samples, n_outputs) The output matrix. - forest : ForestClassifier - A trained forest on the original data. - perm_forest : ForestClassifier - A trained forest on the permuted data. + y_pred_proba_normal : ArrayLike of shape (n_samples_normal, n_outputs) + The predicted posteriors from the normal forest. + y_pred_proba_perm : ArrayLike of shape (n_samples_perm, n_outputs) + The predicted posteriors from the permuted forest. + normal_samples : ArrayLike of shape (n_samples_normal,) + The indices of the normal samples that we have a posterior for. + perm_samples : ArrayLike of shape (n_samples_perm,) + The indices of the permuted samples that we have a posterior for. metric : str, optional The metric, which to compute the null distribution of statistics, by default 'mse'. n_repeats : int, optional @@ -142,33 +148,37 @@ def _compute_null_distribution_coleman( An array of the metrics computed on the other half of the trees. """ rng = np.random.default_rng(seed) - check_is_fitted(forest) - check_is_fitted(perm_forest) - X_test, y_test = check_X_y(X_test, y_test, ensure_2d=True) + # X_test, y_test = check_X_y(X_test, y_test, copy=True, ensure_2d=True, multi_output=True) metric_func = METRIC_FUNCTIONS[metric] # sample two sets of equal number of trees from the combined forest - y_pred_proba_normal = forest.predict_proba(X_test) - y_pred_proba_perm = perm_forest.predict_proba(X_test) all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) - n_samples = len(y_test) + # get the indices of the samples that we have a posterior for, so each element + # is an index into `y_test` + all_samples_pred = np.concatenate((normal_samples, perm_samples), axis=0) + + n_samples_final = len(all_samples_pred) # pre-allocate memory for the index array - index_arr = np.arange(n_samples * 2, dtype=int) + index_arr = np.arange(n_samples_final, dtype=int) metric_star = np.zeros((n_repeats,)) metric_star_pi = np.zeros((n_repeats,)) for idx in range(n_repeats): # two sets of random indices from 1 : 2N are sampled using Fisher-Yates rng.shuffle(index_arr) - first_half_index = index_arr[:n_samples] - second_half_index = index_arr[n_samples:] + first_half_index = index_arr[: n_samples_final // 2] + second_half_index = index_arr[n_samples_final // 2 :] + + # now get the pointers to the actual samples used for the metric + y_test_first_half = y_test[all_samples_pred[first_half_index]] + y_test_second_half = y_test[all_samples_pred[second_half_index]] # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_true=y_test, y_pred=all_y_pred[first_half_index]) - second_half_metric = metric_func(y_true=y_test, y_pred=all_y_pred[second_half_index]) + first_half_metric = metric_func(y_test_first_half, all_y_pred[first_half_index]) + second_half_metric = metric_func(y_test_second_half, all_y_pred[second_half_index]) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From 1a5ebe32ba3e8803540078957ad0ec9e18e5cd07 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 13 Sep 2023 22:32:56 -0400 Subject: [PATCH 13/70] Linear model not working? Signed-off-by: Adam Li --- benchmarks_nonasv/bench_forestht.py | 22 ++ sktree/stats/__init__.py | 2 +- sktree/stats/forestht.py | 583 +++++++++++++++++++++++++++- sktree/stats/tests/test_forestht.py | 65 ++-- sktree/stats/utils.py | 10 +- 5 files changed, 644 insertions(+), 38 deletions(-) create mode 100644 benchmarks_nonasv/bench_forestht.py diff --git a/benchmarks_nonasv/bench_forestht.py b/benchmarks_nonasv/bench_forestht.py new file mode 100644 index 000000000..88207b309 --- /dev/null +++ b/benchmarks_nonasv/bench_forestht.py @@ -0,0 +1,22 @@ +"""The following functions reproduce the results from the paper, :footcite:`coleman2022scalable`. + +Specifically, the simulations for model 1, 2, 3 and 4 are reproduced. + +.. note:: This script will take a long time to run, since a power curve is generated. +""" + +def linear_model_ancova(): + pass + +def linear_model_mars(): + pass + +def correlated_logit_model(): + pass + + +def random_forest_model(): + pass + +if __name__ == "__main__": + pass diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index 77cdb9372..da54ef020 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,2 +1,2 @@ from ._might import MIGHT, MIGHT_MV -from .forestht import ForestHT +from .forestht import ForestHT, HyppoForestRegressor diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 8fdbddee4..3b8317710 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -4,11 +4,50 @@ from sklearn.model_selection import train_test_split from sklearn.utils.validation import _is_fitted, check_X_y -from sktree._lib.sklearn.ensemble._forest import ForestClassifier -from sktree._lib.sklearn.tree import DecisionTreeClassifier +from sktree._lib.sklearn.ensemble._forest import ( + ForestClassifier, + ForestRegressor, + RandomForestRegressor, +) +from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from ..ensemble import HonestForestClassifier -from .utils import METRIC_FUNCTIONS, _compute_null_distribution_coleman, _pvalue +from .utils import METRIC_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, _pvalue + + +def train_tree( + tree: DecisionTreeClassifier, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, +) -> ArrayLike: + """Compute the posterior from each tree on the "OOB" samples. + + Parameters + ---------- + tree : DecisionTreeClassifier + The tree to compute the posterior from. + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The output matrix. + covariate_index : ArrayLike of shape (n_covariates,), optional + The indices of the covariates to permute, by default None, which + does not permute any columns. + """ + # seed the random number generator using each tree's random seed(?) + rng = np.random.default_rng(tree.random_state) + + indices = np.arange(X.shape[0]) + + if covariate_index is not None: + # perform permutation of covariates + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + perm_X_cov = X[index_arr, covariate_index] + X[:, covariate_index] = perm_X_cov + + # individual tree permutation of y labels + tree.fit(X, y, check_input=False) def tree_posterior( @@ -362,11 +401,17 @@ def _statistic( for idx in range(self.n_estimators): seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) indices_train, indices_test = train_test_split( - indices, test_size=test_size, stratify=y, shuffle=True, - random_state=seed + indices, test_size=test_size, stratify=y, shuffle=True, random_state=seed ) tree = estimator.estimators_[idx] - tree_posterior(tree, X[indices_train, :], y[indices_train, :], covariate_index, test_size, seed=seed) + tree_posterior( + tree, + X[indices_train, :], + y[indices_train, :], + covariate_index, + test_size, + seed=seed, + ) y_pred = tree.predict_proba(X[indices_test, :]) @@ -383,15 +428,14 @@ def _statistic( if metric == "auc": if posterior_final.shape[1] != 2: raise ValueError( - "AUC only supports binary classification. " - "Please use a different metric." + "AUC only supports binary classification. " "Please use a different metric." ) print(posterior_final[:5, :]) # get posteriors of the positive class posterior_final = posterior_final[:, 1] - - print('Y true: ', y_true_final) - print('posterior: ', posterior_final) + + print("Y true: ", y_true_final) + print("posterior: ", posterior_final) stat = metric_func(y_true_final, posterior_final, **metric_kwargs) if covariate_index is None: @@ -459,7 +503,7 @@ def statistic( X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) if y.ndim != 2: y = y.reshape(-1, 1) - + if covariate_index is None: estimator = self.estimator else: @@ -589,3 +633,518 @@ def test( return observe_stat, pval +class HyppoForestRegressor(MetaEstimatorMixin): + """Forest hypothesis testing with continuous `y` variable. + + Parameters + ---------- + n_estimators : int, default=100 + The number of trees in the forest. + + criterion : {"gini", "entropy"}, default="gini" + The function to measure the quality of a split. Supported criteria are + "gini" for the Gini impurity and "entropy" for the information gain. + Note: this parameter is tree-specific. + + splitter : {"best", "random"}, default="best" + The strategy used to choose the split at each node. Supported + strategies are "best" to choose the best split and "random" to choose + the best random split. + + max_depth : int, default=None + The maximum depth of the tree. If None, then nodes are expanded until + all leaves are pure or until all leaves contain less than + min_samples_split samples. + + min_samples_split : int or float, default=2 + The minimum number of samples required to split an internal node: + + - If int, then consider `min_samples_split` as the minimum number. + - If float, then `min_samples_split` is a fraction and + `ceil(min_samples_split * n_samples)` are the minimum + number of samples for each split. + + min_samples_leaf : int or float, default=1 + The minimum number of samples required to be at a leaf node. + A split point at any depth will only be considered if it leaves at + least ``min_samples_leaf`` training samples in each of the left and + right branches. This may have the effect of smoothing the model, + especially in regression. + + - If int, then consider `min_samples_leaf` as the minimum number. + - If float, then `min_samples_leaf` is a fraction and + `ceil(min_samples_leaf * n_samples)` are the minimum + number of samples for each node. + + min_weight_fraction_leaf : float, default=0.0 + The minimum weighted fraction of the sum total of weights (of all + the input samples) required to be at a leaf node. Samples have + equal weight when sample_weight is not provided. + + max_features : {"sqrt", "log2", None}, int or float, default="sqrt" + The number of features to consider when looking for the best split: + + - If int, then consider `max_features` features at each split. + - If float, then `max_features` is a fraction and + `round(max_features * n_features)` features are considered at each + split. + - If "auto", then `max_features=sqrt(n_features)`. + - If "sqrt", then `max_features=sqrt(n_features)`. + - If "log2", then `max_features=log2(n_features)`. + - If None, then `max_features=n_features`. + + Note: the search for a split does not stop until at least one + valid partition of the node samples is found, even if it requires to + effectively inspect more than ``max_features`` features. + + max_leaf_nodes : int, default=None + Grow trees with ``max_leaf_nodes`` in best-first fashion. + Best nodes are defined as relative reduction in impurity. + If None then unlimited number of leaf nodes. + + min_impurity_decrease : float, default=0.0 + A node will be split if this split induces a decrease of the impurity + greater than or equal to this value. + + The weighted impurity decrease equation is the following:: + + N_t / N * (impurity - N_t_R / N_t * right_impurity + - N_t_L / N_t * left_impurity) + + where ``N`` is the total number of samples, ``N_t`` is the number of + samples at the current node, ``N_t_L`` is the number of samples in the + left child, and ``N_t_R`` is the number of samples in the right child. + + ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, + if ``sample_weight`` is passed. + + bootstrap : bool, default=True + Whether bootstrap samples are used when building trees. If False, the + whole dataset is used to build each tree. + + oob_score : bool, default=False + Whether to use out-of-bag samples to estimate the generalization score. + Only available if bootstrap=True. + + n_jobs : int, default=None + The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, + :meth:`decision_path` and :meth:`apply` are all parallelized over the + trees. ``None`` means 1 unless in a `joblib.parallel_backend` + context. ``-1`` means using all processors. See :term:`Glossary + ` for more details. + + random_state : int, RandomState instance or None, default=None + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. + + verbose : int, default=0 + Controls the verbosity when fitting and predicting. + + warm_start : bool, default=False + When set to ``True``, reuse the solution of the previous call to fit + and add more estimators to the ensemble, otherwise, just fit a whole + new forest. See :term:`the Glossary `. + + class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ + default=None + Weights associated with classes in the form ``{class_label: weight}``. + If not given, all classes are supposed to have weight one. For + multi-output problems, a list of dicts can be provided in the same + order as the columns of y. + + Note that for multioutput (including multilabel) weights should be + defined for each class of every column in its own dict. For example, + for four-class multilabel classification weights should be + [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of + [{1:1}, {2:5}, {3:1}, {4:1}]. + + The "balanced" mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies in the input data + as ``n_samples / (n_classes * np.bincount(y))`` + + The "balanced_subsample" mode is the same as "balanced" except that + weights are computed based on the bootstrap sample for every tree + grown. + + For multi-output, the weights of each column of y will be multiplied. + + Note that these weights will be multiplied with sample_weight (passed + through the fit method) if sample_weight is specified. + + ccp_alpha : non-negative float, default=0.0 + Complexity parameter used for Minimal Cost-Complexity Pruning. The + subtree with the largest cost complexity that is smaller than + ``ccp_alpha`` will be chosen. By default, no pruning is performed. See + :ref:`minimal_cost_complexity_pruning` for details. + + max_samples : int or float, default=None + If bootstrap is True, the number of samples to draw from X + to train each base tree estimator. + + - If None (default), then draw `X.shape[0]` samples. + - If int, then draw `max_samples` samples. + - If float, then draw `max_samples * X.shape[0]` samples. Thus, + `max_samples` should be in the interval `(0.0, 1.0]`. + + Attributes + ---------- + samples_ : ArrayLike of shape (n_samples,) + The indices of the samples used in the final test. + + y_true_final_ : ArrayLike of shape (n_samples_final,) + The true labels of the samples used in the final test. + + posterior_final_ : ArrayLike of shape (n_samples_final,) + The predicted posterior probabilities of the samples used in the final test. + + null_dist_ : ArrayLike of shape (n_repeats,) + The null distribution of the test statistic. + """ + + def __init__( + self, + estimator=None, + n_estimators=100, + criterion="squared_error", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + permute_per_tree=True, + ): + self.estimator = estimator + self.n_jobs = n_jobs + self.n_estimators = n_estimators + self.criterion = criterion + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.bootstrap = bootstrap + self.oob_score = oob_score + self.random_state = random_state + self.verbose = verbose + self.warm_start = warm_start + self.ccp_alpha = ccp_alpha + self.max_samples = max_samples + self.permute_per_tree = permute_per_tree + + def _statistic( + self, + estimator: ForestClassifier, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + test_size=0.2, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Helper function to compute the test statistic.""" + metric_func = METRIC_FUNCTIONS[metric] + rng = np.random.default_rng(self.random_state) + n_samples = X.shape[0] + indices = np.arange(n_samples, dtype=int) + + if self.permute_per_tree: + # first run a dummy fit on the samples to initialize the + # internal data structure of the forest + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + + # Fit each tree and compute posteriors with train test splits + posterior_arr = np.zeros((self.n_estimators, n_samples, estimator.n_outputs_)) + for idx in range(self.n_estimators): + seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) + indices_train, indices_test = train_test_split( + indices, test_size=test_size, stratify=y, shuffle=True, random_state=seed + ) + tree: DecisionTreeRegressor = estimator.estimators_[idx] + train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + + y_pred = tree.predict(X[indices_test, :]) + + # Fill test set posteriors & set rest NaN + posterior = np.full((y.shape[0], tree.n_outputs_), np.nan) + posterior[indices_test, :] = y_pred + posterior_arr[idx, ...] = posterior + + # Average all posteriors + posterior_final = np.nanmean(posterior_arr, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() + y_true_final = y[samples, :] + posterior_final = posterior_final[samples, :] + else: + if covariate_index is not None: + print("Permuting the covariate...") + # perform permutation of covariates + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + X[:, covariate_index] = X[index_arr, covariate_index] + + X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] + y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + + estimator.fit(X_train, y_train) + y_pred = estimator.predict(X_test) + + # set variables to compute metric + samples = self.indices_test_ + y_true_final = y_test + posterior_final = y_pred + + # print('Y true: ', y_true_final) + # print('posterior: ', posterior_final) + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) + + if covariate_index is None: + # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) + # arrays of y and predicted posterior + self.samples_ = samples + self.y_true_final_ = y_true_final + self.posterior_final_ = posterior_final + self.stat_ = stat + + if return_posteriors: + return stat, posterior_final, samples + + return stat + + def reset(self): + class_attributes = dir(type(self)) + instance_attributes = dir(self) + + for attr_name in instance_attributes: + if attr_name.endswith("_") and attr_name not in class_attributes: + delattr(self, attr_name) + + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + test_size=0.2, + return_posteriors: bool = False, + check_input: bool = True, + **metric_kwargs, + ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "auc". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + if check_input: + X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + if metric not in REGRESSOR_METRICS: + raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') + + if not hasattr(self, "estimator_") and self.estimator is None: + self.estimator_ = RandomForestRegressor( + n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + bootstrap=self.bootstrap, + oob_score=self.oob_score, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose, + warm_start=self.warm_start, + ccp_alpha=self.ccp_alpha, + max_samples=self.max_samples, + ) + elif not isinstance(self.estimator_, ForestRegressor): + raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") + + if covariate_index is None: + estimator = self.estimator_ + else: + self.permuted_estimator_ = clone(self.estimator_) + estimator = self.permuted_estimator_ + + return self._statistic( + estimator, + X, + y, + covariate_index=covariate_index, + metric=metric, + test_size=test_size, + return_posteriors=return_posteriors, + **metric_kwargs, + ) + + def test( + self, + X, + y, + covariate_index: ArrayLike, + metric: str = "mse", + test_size: float = 0.2, + n_repeats: int = 1000, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Perform hypothesis test using Coleman method. + + X is split into a training/testing split. Optionally, the covariate index + columns are shuffled. + + On the training dataset, two honest forests are trained and then the posterior + is estimated on the testing dataset. One honest forest is trained on the + permuted dataset and the other is trained on the original dataset. + + Finally, resample the posteriors of the two forests to compute the null + distribution of the statistics. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "mse". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + n_repeats : int, optional + Number of times to sample the null distribution, by default 1000. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + pval : float + The p-value of the test statistic. + """ + X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + indices = np.arange(X.shape[0]) + if not self.permute_per_tree: + # train/test split + # XXX: could add stratifying by y when y is classification + indices_train, indices_test = train_test_split( + indices, test_size=test_size, shuffle=True + ) + self.indices_train_ = indices_train + self.indices_test_ = indices_test + + if not hasattr(self, "samples_"): + # first compute the test statistic on the un-permuted data + observe_stat, observe_posteriors, observe_samples = self.statistic( + X, + y, + covariate_index=None, + metric=metric, + test_size=test_size, + return_posteriors=True, + check_input=False, + **metric_kwargs, + ) + else: + observe_samples = self.samples_ + observe_posteriors = self.posterior_final_ + observe_stat = self.stat_ + + # next permute the data + permute_stat, permute_posteriors, permute_samples = self.statistic( + X, + y, + covariate_index=covariate_index, + metric=metric, + test_size=test_size, + return_posteriors=True, + check_input=False, + **metric_kwargs, + ) + + # Note: at this point, both `estimator` and `permuted_estimator_` should + # have been fitted already, so we can now compute on the null by resampling + # the posteriors and computing the test statistic on the resampled posteriors + metric_star, metric_star_pi = _compute_null_distribution_coleman( + X_test=X, + y_test=y, + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + normal_samples=observe_samples, + perm_samples=permute_samples, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + # print(observe_posteriors) + # print(permute_posteriors) + # metric^\pi - metric + observe_stat = permute_stat - observe_stat + + # metric^\pi_j - metric_j + null_dist = metric_star_pi - metric_star + + pval = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) + + if return_posteriors: + self.observe_posteriors_ = observe_posteriors + self.permute_posteriors_ = permute_posteriors + self.observe_samples_ = observe_samples + self.permute_samples_ = permute_samples + + self.null_dist_ = null_dist + return observe_stat, pval diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 7a45cd19c..3c4e644eb 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,11 +1,11 @@ import numpy as np -from scipy.special import expit import pytest -from sktree.stats.forestht import ForestHT +from scipy.special import expit from sklearn import datasets from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import MIGHT +from sktree.stats.forestht import ForestHT, HyppoForestRegressor from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier # load the iris dataset @@ -25,6 +25,14 @@ seed = 12345 +def test_forestht_proper_attributes(): + """Forest HTs should have n_classes_ and n_outputs_ properly set. + + This requires the first dummy fit to always get all classes. + """ + pass + + def test_iris(): pass @@ -36,14 +44,13 @@ def test_linear_model(): Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon """ - # TODO: this requires us to implement the test using forestregressors - pass - j = np.linspace(0.005, 2.25, 9)[0] - beta = 10 - sigma = 10 / j - n_samples = 20 + # j = np.linspace(0.005, 2.25, 9)[] + beta = 10.0 + sigma = 0.05 # / j + n_samples = 2500 n_estimators = 125 - # subsample_size = np.power(n_samples, 0.6) + test_size = 0.1 + # subsample_size = 0.8 rng = np.random.default_rng(seed) @@ -54,29 +61,41 @@ def test_linear_model(): X_610[:, idx] = np.argwhere( rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) )[:, 1] - X = np.concatenate((X_15, X_610), axis=1) + X = np.concatenate((X_15, X_610), axis=1, dtype=np.float32) + assert X.shape == (n_samples, 10) # sample noise - epsilon = rng.normal(size=n_samples, scale=sigma) + epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) # compute final y of (n_samples,) - y = beta * X[:, 0] + beta * (X[:, 5] == 2) + epsilon - - est = ForestHT(random_state=seed, n_estimators=n_estimators) + y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon + est = HyppoForestRegressor( + max_features=1.0, + random_state=seed, + n_estimators=n_estimators, + n_jobs=-1, + permute_per_tree=False, + # bootstrap=True, max_samples=subsample_size + ) # test for X_1 - stat, pvalue = est.test(X, y, [0]) - assert pvalue < 0.05 + stat, pvalue = est.test(X, y, [0], test_size=test_size) + print(pvalue) + # assert pvalue < 0.05, f"pvalue: {pvalue}" # test for X_6 - stat, pvalue = est.test(X, y, [5]) - assert pvalue < 0.05 + stat, pvalue = est.test(X, y, [5], test_size=test_size) + print(pvalue) + # assert pvalue < 0.05, f"pvalue: {pvalue}" # test for a few unimportant other X - for covariate_index in [1, 2, 3]: + for covariate_index in [1, 6]: # test for X_2, X_3, X_4 - stat, pvalue = est.test(X, y, [covariate_index]) - assert pvalue > 0.05 + stat, pvalue = est.test(X, y, [covariate_index], test_size=test_size) + print(pvalue) + # assert pvalue > 0.05, f"pvalue: {pvalue}" + + assert False def test_correlated_logit_model(): @@ -135,7 +154,7 @@ def test_correlated_logit_model(): @pytest.mark.parametrize("criterion", ["gini", "entropy"]) -@pytest.mark.parametrize("max_features", [None, 'sqrt']) +@pytest.mark.parametrize("max_features", [None, "sqrt"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"]) @pytest.mark.parametrize( "estimator", @@ -172,4 +191,4 @@ def test_iris_pauc(criterion, max_features, honest_prior, estimator, limit): stat, pvalue = clf.test(X, iris_y, [2, 3], test_size=test_size, metric="auc") print(pvalue) # assert pvalue < 0.05, f"pvalue: {pvalue}" - assert False \ No newline at end of file + assert False diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index baa3e1c42..94754bfb0 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -3,7 +3,7 @@ import numpy as np from numpy.typing import ArrayLike from scipy.stats import entropy -from sklearn.metrics import mean_squared_error, roc_auc_score +from sklearn.metrics import mean_absolute_error, mean_squared_error, roc_auc_score from sklearn.utils.validation import check_X_y from sktree._lib.sklearn.ensemble._forest import ForestClassifier @@ -16,7 +16,13 @@ def _mutual_information(y_true, y_pred): return max(H_Y - H_YX, 0) -METRIC_FUNCTIONS = {"mse": mean_squared_error, "auc": roc_auc_score, "mi": _mutual_information} +METRIC_FUNCTIONS = { + "mse": mean_squared_error, + "mae": mean_absolute_error, + "auc": roc_auc_score, + "mi": _mutual_information, +} +REGRESSOR_METRICS = ("mse", "mae") def _pvalue(observe_stat: float, permuted_stat: ArrayLike, correction: bool = True) -> float: From d8da658b8395b9528b2640d6319abb72678926c8 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Fri, 15 Sep 2023 11:19:40 -0400 Subject: [PATCH 14/70] Adding permutation forest that seems to work Signed-off-by: Adam Li --- README.md | 12 +- benchmarks_nonasv/bench_forestht.py | 155 +++- .../test_permutation_forest.ipynb | 385 +++++++++ sktree/conftest.py | 2 + sktree/meson.build | 1 + sktree/stats/__init__.py | 3 +- sktree/stats/forestht.py | 52 +- sktree/stats/meson.build | 1 + sktree/stats/permutationforest.py | 763 ++++++++++++++---- sktree/stats/tests/test_forestht.py | 104 +-- sktree/stats/utils.py | 81 +- sktree/tree/_classes.py | 2 +- 12 files changed, 1275 insertions(+), 286 deletions(-) create mode 100644 benchmarks_nonasv/test_permutation_forest.ipynb create mode 100644 sktree/conftest.py diff --git a/README.md b/README.md index 470641ef1..36e39fd02 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,6 @@ scikit-tree is a scikit-learn compatible API for building state-of-the-art decis Tree-models have withstood the test of time, and are consistently used for modern-day data science and machine learning applications. They especially perform well when there are limited samples for a problem and are flexible learners that can be applied to a wide variety of different settings, such as tabular, images, time-series, genomics, EEG data and more. -We welcome contributions for modern tree-based algorithms. We use Cython to achieve fast C/C++ speeds, while abiding by a scikit-learn compatible (tested) API. Moreover, our Cython internals are easily extensible because they follow the internal Cython API of scikit-learn as well. - -Due to the current state of scikit-learn's internal Cython code for trees, we have to instead leverage a fork of scikit-learn at https://github.com/neurodata/scikit-learn when -extending the decision tree model API of scikit-learn. Specifically, we extend the Python and Cython API of the tree submodule in scikit-learn in our submodule, so we can introduce the tree models housed in this package. Thus these extend the functionality of decision-tree based models in a way that is not possible yet in scikit-learn itself. As one example, we introduce an abstract API to allow users to implement their own oblique splits. Our plan in the future is to benchmark these functionalities and introduce them upstream to scikit-learn where applicable and inclusion criterion are met. - Documentation ============= @@ -105,6 +100,13 @@ Alternatively, you can use editable installs pip install --no-build-isolation --editable . +Development +=========== +We welcome contributions for modern tree-based algorithms. We use Cython to achieve fast C/C++ speeds, while abiding by a scikit-learn compatible (tested) API. Moreover, our Cython internals are easily extensible because they follow the internal Cython API of scikit-learn as well. + +Due to the current state of scikit-learn's internal Cython code for trees, we have to instead leverage a fork of scikit-learn at https://github.com/neurodata/scikit-learn when +extending the decision tree model API of scikit-learn. Specifically, we extend the Python and Cython API of the tree submodule in scikit-learn in our submodule, so we can introduce the tree models housed in this package. Thus these extend the functionality of decision-tree based models in a way that is not possible yet in scikit-learn itself. As one example, we introduce an abstract API to allow users to implement their own oblique splits. Our plan in the future is to benchmark these functionalities and introduce them upstream to scikit-learn where applicable and inclusion criterion are met. + References ========== [1]: [`Li, Adam, et al. "Manifold Oblique Random Forests: Towards Closing the Gap on Convolutional Deep Networks" SIAM Journal on Mathematics of Data Science, 5(1), 77-96, 2023`](https://doi.org/10.1137/21M1449117) diff --git a/benchmarks_nonasv/bench_forestht.py b/benchmarks_nonasv/bench_forestht.py index 88207b309..c18bb9e8b 100644 --- a/benchmarks_nonasv/bench_forestht.py +++ b/benchmarks_nonasv/bench_forestht.py @@ -4,19 +4,164 @@ .. note:: This script will take a long time to run, since a power curve is generated. """ +from collections import defaultdict + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import seaborn as sns +from scipy.special import expit + +from sktree.stats import PermutationForestClassifier, PermutationForestRegressor + +seed = 12345 + + +def linear_model_ancova(sigma_factor=2.0, seed=None): + r"""Test MIGHT using MSE from linear model simulation. + + See https://arxiv.org/pdf/1904.07830.pdf Figure 1. + + Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon + """ + beta = 10.0 + sigma = 10.0 / sigma_factor + n_samples = 2200 + n_estimators = 125 + test_size = 0.1 + + rng = np.random.default_rng(seed) + + # sample covariates + X_15 = rng.uniform(0, 1, size=(n_samples, 5)) + X_610 = np.zeros((n_samples, 5)) + for idx in range(5): + buff = np.argwhere(rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=n_samples))[:, 1] + + X_610[:, idx] = buff + + X = np.concatenate((X_15, X_610), axis=1) + assert X_15.shape == (n_samples, 5) + assert X_610.shape == (n_samples, 5) + assert X.shape == (n_samples, 10) + + # sample noise + epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) + + # compute final y of (n_samples,) + y = beta * X[:, 0] + (beta * (X[:, 5] - 2)) + epsilon + + # initialize hypothesis tester + est = PermutationForestRegressor( + max_features=1.0, + random_state=seed, + n_estimators=n_estimators, + n_jobs=-1, + # bootstrap=True, + # max_samples=subsample_size + ) + pvalue_dict = {} + + # test for X_1 + stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=100, test_size=test_size) + print("X1: ", pvalue) + pvalue_dict["X1"] = pvalue + # assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for X_6 + stat, pvalue = est.test(X.copy(), y.copy(), [5], n_repeats=100, test_size=test_size) + print("X6: ", pvalue) + pvalue_dict["X6"] = pvalue + # assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for a few unimportant other X + for name, covariate_index in zip(["X2", "X7"], [1, 6]): + # test for X_2, X_7 + stat, pvalue = est.test( + X.copy(), y.copy(), [covariate_index], n_repeats=100, test_size=test_size + ) + print("X2/7: ", pvalue) + pvalue_dict[name] = pvalue + # assert pvalue > 0.05, f"pvalue: {pvalue}" + + return pvalue_dict -def linear_model_ancova(): - pass def linear_model_mars(): pass -def correlated_logit_model(): - pass + +def correlated_logit_model(beta=5.0, seed=None): + n_samples = 600 + n_estimators = 125 + n_jobs = -1 + max_features = 1.0 + test_size = 1.0 / 6 + metric = "mse" + n_repeats = 100 + + n = 500 # Number of time steps + ar_coefficient = 0.15 + + rng = np.random.default_rng(seed) + + X = np.zeros((n_samples, n)) + for idx in range(n_samples): + # sample covariates + white_noise = rng.standard_normal(size=n) + + # Create an array to store the simulated AR(1) time series + ar1_series = np.zeros(n) + ar1_series[0] = white_noise[0] + + # Simulate the AR(1) process + for t in range(1, n): + ar1_series[t] = ar_coefficient * ar1_series[t - 1] + white_noise[t] + + X[idx, :] = ar1_series + + # now compute the output labels + y_proba = expit(beta * X[:, 1:5].sum(axis=1)) + assert y_proba.shape == (n_samples,) + y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) + + est = PermutationForestClassifier( + max_features=max_features, random_state=seed, n_estimators=n_estimators, n_jobs=n_jobs + ) + + # test for X_2 important + stat, pvalue = est.test(X, y, [1], test_size=test_size, n_repeats=n_repeats, metric=metric) + print("X2: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for X_1 unimportant + stat, pvalue = est.test(X, y, [0], test_size=test_size, n_repeats=n_repeats, metric=metric) + print("X1: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" + + # test for X_500 unimportant + stat, pvalue = est.test(X, y, [n - 1], test_size=test_size, n_repeats=n_repeats, metric=metric) + print("X500: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" def random_forest_model(): pass + if __name__ == "__main__": - pass + pvalue_dict = defaultdict(list) + rng = np.random.default_rng(seed) + + j_space = np.linspace(0.005, 2.25, 9) + + for sigma_factor in j_space: + for idx in range(10): + new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) + + elements_dict = linear_model_ancova(sigma_factor, new_seed) + for key, value in elements_dict.items(): + pvalue_dict[key].append(value) + pvalue_dict["sigma_factor"].append(sigma_factor) + + df = pd.DataFrame(pvalue_dict) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb new file mode 100644 index 000000000..6cd8626d7 --- /dev/null +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "\n", + "from sktree.stats import HyppoForestRegressor, PermutationForest\n", + "\n", + "seed = 12345" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", + "metadata": {}, + "outputs": [], + "source": [ + "def linear_model_ancova(sigma_factor=2.0, seed=None):\n", + " r\"\"\"Test MIGHT using MSE from linear model simulation.\n", + "\n", + " See https://arxiv.org/pdf/1904.07830.pdf Figure 1.\n", + "\n", + " Y = Beta * X_1 + Beta * I(X_6 = 2) + \\epsilon\n", + " \"\"\"\n", + " beta = 10.0\n", + " sigma = 10.0 / sigma_factor\n", + " n_samples = 2200\n", + " n_estimators = 125\n", + " test_size = 0.1\n", + "\n", + " rng = np.random.default_rng(seed)\n", + "\n", + " # sample covariates\n", + " X_15 = rng.uniform(0, 1, size=(n_samples, 5))\n", + " X_610 = np.zeros((n_samples, 5))\n", + " for idx in range(5):\n", + " buff = np.argwhere(\n", + " rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=n_samples)\n", + " )[:, 1]\n", + "\n", + " X_610[:, idx] = buff\n", + "\n", + " X = np.concatenate((X_15, X_610), axis=1)\n", + " assert X_15.shape == (n_samples, 5)\n", + " assert X_610.shape == (n_samples, 5)\n", + " assert X.shape == (n_samples, 10)\n", + "\n", + " # sample noise\n", + " epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma)\n", + "\n", + " # compute final y of (n_samples,)\n", + " y = beta * X[:, 0] + (beta * (X[:, 5] - 2)) + epsilon\n", + "\n", + " # initialize hypothesis tester\n", + " est = PermutationForest(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " # bootstrap=True,\n", + " # max_samples=subsample_size\n", + " )\n", + " pvalue_dict = {}\n", + "\n", + " # test for X_1\n", + " stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=100, test_size=test_size)\n", + " print(\"X1: \", pvalue)\n", + " pvalue_dict[\"X1\"] = pvalue\n", + " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", + "\n", + " # test for X_6\n", + " stat, pvalue = est.test(X.copy(), y.copy(), [5], n_repeats=100, test_size=test_size)\n", + " print(\"X6: \", pvalue)\n", + " pvalue_dict[\"X6\"] = pvalue\n", + " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", + "\n", + " # test for a few unimportant other X\n", + " for name, covariate_index in zip([\"X2\", \"X7\"], [1, 6]):\n", + " # test for X_2, X_7\n", + " stat, pvalue = est.test(\n", + " X.copy(), y.copy(), [covariate_index], n_repeats=100, test_size=test_size\n", + " )\n", + " print(\"X2/7: \", pvalue)\n", + " pvalue_dict[name] = pvalue\n", + " # assert pvalue > 0.05, f\"pvalue: {pvalue}\"\n", + "\n", + " return pvalue_dict\n", + "\n", + "\n", + "def linear_model_mars():\n", + " pass\n", + "\n", + "\n", + "def correlated_logit_model():\n", + " pass\n", + "\n", + "\n", + "def random_forest_model():\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 0.7623762376237624\n", + "X6: 0.0891089108910891\n", + "X2/7: 0.8316831683168316\n", + "X2/7: 0.8712871287128713\n", + "X1: 0.9504950495049505\n", + "X6: 1.0\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.693069306930693\n", + "X6: 0.6039603960396039\n", + "X2/7: 0.39603960396039606\n", + "X2/7: 0.594059405940594\n", + "X1: 0.9306930693069307\n", + "X6: 0.9900990099009901\n", + "X2/7: 0.9801980198019802\n", + "X2/7: 1.0\n", + "X1: 0.36633663366336633\n", + "X6: 0.039603960396039604\n", + "X2/7: 0.7623762376237624\n", + "X2/7: 0.9603960396039604\n", + "X1: 0.21782178217821782\n", + "X6: 0.43564356435643564\n", + "X2/7: 0.6237623762376238\n", + "X2/7: 0.24752475247524752\n", + "X1: 0.42574257425742573\n", + "X6: 0.7425742574257426\n", + "X2/7: 0.44554455445544555\n", + "X2/7: 0.1188118811881188\n", + "X1: 0.36633663366336633\n", + "X6: 0.0297029702970297\n", + "X2/7: 0.13861386138613863\n", + "X2/7: 0.1485148514851485\n", + "X1: 0.9405940594059405\n", + "X6: 1.0\n", + "X2/7: 0.9207920792079208\n", + "X2/7: 1.0\n", + "X1: 0.504950495049505\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.5742574257425742\n", + "X2/7: 1.0\n", + "X1: 0.7128712871287128\n", + "X6: 0.2079207920792079\n", + "X2/7: 0.9306930693069307\n", + "X2/7: 1.0\n", + "X1: 0.26732673267326734\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.5742574257425742\n", + "X2/7: 0.13861386138613863\n", + "X1: 0.7722772277227723\n", + "X6: 0.900990099009901\n", + "X2/7: 0.9900990099009901\n", + "X2/7: 1.0\n", + "X1: 0.297029702970297\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.6633663366336634\n", + "X2/7: 0.009900990099009901\n", + "X1: 0.9702970297029703\n", + "X6: 0.6336633663366337\n", + "X2/7: 0.9900990099009901\n", + "X2/7: 1.0\n", + "X1: 0.16831683168316833\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.26732673267326734\n", + "X2/7: 0.019801980198019802\n", + "X1: 0.27722772277227725\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.24752475247524752\n", + "X2/7: 0.12871287128712872\n", + "X1: 0.16831683168316833\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.5742574257425742\n", + "X2/7: 0.21782178217821782\n", + "X1: 0.019801980198019802\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.09900990099009901\n", + "X2/7: 0.0594059405940594\n", + "X1: 0.2871287128712871\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.40594059405940597\n", + "X2/7: 1.0\n", + "X1: 0.5346534653465347\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.504950495049505\n", + "X2/7: 0.0891089108910891\n", + "X1: 0.16831683168316833\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.900990099009901\n", + "X2/7: 1.0\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.009900990099009901\n", + "X2/7: 0.009900990099009901\n", + "X1: 0.019801980198019802\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.40594059405940597\n", + "X2/7: 0.039603960396039604\n", + "X1: 0.22772277227722773\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.6534653465346535\n", + "X2/7: 0.9504950495049505\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.009900990099009901\n", + "X2/7: 0.009900990099009901\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.09900990099009901\n", + "X2/7: 0.009900990099009901\n", + "X1: 0.5544554455445545\n", + "X6: 0.009900990099009901\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.019801980198019802\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.38613861386138615\n", + "X2/7: 1.0\n", + "X1: 0.0594059405940594\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.2079207920792079\n", + "X2/7: 0.33663366336633666\n", + "X1: 0.039603960396039604\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.6732673267326733\n", + "X2/7: 0.504950495049505\n", + "X1: 0.2079207920792079\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.7227722772277227\n", + "X2/7: 0.7623762376237624\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.9801980198019802\n", + "X2/7: 1.0\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.039603960396039604\n", + "X2/7: 0.13861386138613863\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.8811881188118812\n", + "X2/7: 0.9504950495049505\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.9405940594059405\n", + "X2/7: 0.45544554455445546\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.1485148514851485\n", + "X2/7: 0.9900990099009901\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.7128712871287128\n", + "X2/7: 0.019801980198019802\n", + "X1: 0.019801980198019802\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.9504950495049505\n", + "X2/7: 1.0\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.8613861386138614\n", + "X2/7: 0.46534653465346537\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.7227722772277227\n", + "X2/7: 0.019801980198019802\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.9504950495049505\n", + "X2/7: 0.8118811881188119\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.297029702970297\n", + "X2/7: 1.0\n", + "X1: 0.009900990099009901\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.7821782178217822\n", + "X2/7: 0.8613861386138614\n", + "X1: 0.0297029702970297\n", + "X6: 0.009900990099009901\n", + "X2/7: 0.9108910891089109\n", + "X2/7: 1.0\n" + ] + } + ], + "source": [ + "pvalue_dict = defaultdict(list)\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "j_space = np.linspace(0.005, 2.25, 9)\n", + "\n", + "for sigma_factor in j_space:\n", + " for idx in range(5):\n", + " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "\n", + " elements_dict = linear_model_ancova(sigma_factor, new_seed)\n", + " for key, value in elements_dict.items():\n", + " pvalue_dict[key].append(value)\n", + " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "\n", + "df = pd.DataFrame(pvalue_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], + "source": [ + "print(\"done\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sktree", + "language": "python", + "name": "sktree" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sktree/conftest.py b/sktree/conftest.py new file mode 100644 index 000000000..037cecae6 --- /dev/null +++ b/sktree/conftest.py @@ -0,0 +1,2 @@ +def pytest_configure(config): + config.addinivalue_line("markers", "slowtest: mark test as slow") diff --git a/sktree/meson.build b/sktree/meson.build index 70b709920..8608052b8 100644 --- a/sktree/meson.build +++ b/sktree/meson.build @@ -54,6 +54,7 @@ cython_c_args += numpy_nodepr_api python_sources = [ '__init__.py', 'neighbors.py', + 'conftest.py', ] py3.install_sources( diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index da54ef020..b7301579c 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,2 +1,3 @@ from ._might import MIGHT, MIGHT_MV -from .forestht import ForestHT, HyppoForestRegressor +from .forestht import ForestHT, FeatureImportanceForestRegressor +from .permutationforest import PermutationForestClassifier, PermutationForestRegressor diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 3b8317710..0d88f65df 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -8,46 +8,18 @@ ForestClassifier, ForestRegressor, RandomForestRegressor, + BaseForest ) from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from ..ensemble import HonestForestClassifier -from .utils import METRIC_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, _pvalue - - -def train_tree( - tree: DecisionTreeClassifier, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, -) -> ArrayLike: - """Compute the posterior from each tree on the "OOB" samples. - - Parameters - ---------- - tree : DecisionTreeClassifier - The tree to compute the posterior from. - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The output matrix. - covariate_index : ArrayLike of shape (n_covariates,), optional - The indices of the covariates to permute, by default None, which - does not permute any columns. - """ - # seed the random number generator using each tree's random seed(?) - rng = np.random.default_rng(tree.random_state) - - indices = np.arange(X.shape[0]) - - if covariate_index is not None: - # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) - perm_X_cov = X[index_arr, covariate_index] - X[:, covariate_index] = perm_X_cov - - # individual tree permutation of y labels - tree.fit(X, y, check_input=False) +from .utils import ( + METRIC_FUNCTIONS, + REGRESSOR_METRICS, + _compute_null_distribution_coleman, + _pvalue, + train_tree, +) def tree_posterior( @@ -633,11 +605,15 @@ def test( return observe_stat, pval -class HyppoForestRegressor(MetaEstimatorMixin): +class FeatureImportanceForestRegressor(MetaEstimatorMixin): """Forest hypothesis testing with continuous `y` variable. Parameters ---------- + estimator : object, default=None + Type of forest estimator to use. By default `None`, which defaults to + :class:`sklearn.ensemble.RandomForestRegressor`. + n_estimators : int, default=100 The number of trees in the forest. @@ -848,7 +824,7 @@ def __init__( def _statistic( self, - estimator: ForestClassifier, + estimator: BaseForest, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, diff --git a/sktree/stats/meson.build b/sktree/stats/meson.build index 75947f98c..0b45a0ada 100644 --- a/sktree/stats/meson.build +++ b/sktree/stats/meson.build @@ -3,6 +3,7 @@ python_sources = [ '_might.py', 'forestht.py', 'utils.py', + 'permutationforest.py', ] py3.install_sources( diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 37af8eef2..a4ce3cde6 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -1,15 +1,289 @@ import numpy as np -from joblib import Parallel, delayed from numpy.typing import ArrayLike -from sklearn.base import MetaEstimatorMixin -from sklearn.utils.validation import _is_fitted +from sklearn.base import MetaEstimatorMixin, clone, is_classifier +from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.model_selection import train_test_split +from sklearn.utils.validation import check_X_y -from ..ensemble import HonestForestClassifier -from .utils import METRIC_FUNCTIONS, _pvalue +from sktree._lib.sklearn.ensemble._forest import BaseForest, ForestClassifier, ForestRegressor +from .utils import METRIC_FUNCTIONS, REGRESSOR_METRICS, compute_null_distribution_perm -class PermutationForest(MetaEstimatorMixin): - """Hypothesis testing with a permutation forest. + +class BasePermutationForest(MetaEstimatorMixin): + def __init__( + self, + estimator=None, + n_estimators=100, + criterion="squared_error", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features=1.0, + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + **estimators_kwargs, + ): + self.estimator = estimator + self.n_jobs = n_jobs + self.n_estimators = n_estimators + self.criterion = criterion + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.bootstrap = bootstrap + self.oob_score = oob_score + self.random_state = random_state + self.verbose = verbose + self.warm_start = warm_start + self.ccp_alpha = ccp_alpha + self.max_samples = max_samples + self.estimator_kwargs = estimators_kwargs + + def reset(self): + class_attributes = dir(type(self)) + instance_attributes = dir(self) + + for attr_name in instance_attributes: + if attr_name.endswith("_") and attr_name not in class_attributes: + delattr(self, attr_name) + + def _get_estimator(self): + pass + + def _statistic( + self, + estimator: BaseForest, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + seed=None, + **metric_kwargs, + ): + """Helper function to compute the test statistic.""" + metric_func = METRIC_FUNCTIONS[metric] + if seed is None: + rng = np.random.default_rng(self.random_state) + else: + rng = np.random.default_rng(seed) + n_samples = X.shape[0] + indices = np.arange(n_samples, dtype=int) + + if covariate_index is not None: + # perform permutation of covariates + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + X = X.copy() + X[:, covariate_index] = X[index_arr, covariate_index] + + X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] + y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + + estimator.fit(X_train, y_train.ravel()) + + # Either get the predicted value, or the posterior probabilities + y_pred = estimator.predict(X_test) + + # set variables to compute metric + samples = self.indices_test_ + y_true_final = y_test + posterior_final = y_pred + + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) + + if covariate_index is None: + # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) + # arrays of y and predicted posterior + self.samples_ = samples + self.y_true_final_ = y_true_final + self.posterior_final_ = posterior_final + self.stat_ = stat + + if return_posteriors: + return stat, posterior_final, samples + + return stat + + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + check_input: bool = True, + seed=None, + **metric_kwargs, + ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "mse". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + if check_input: + X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + self.estimator_ = self._get_estimator() + + if is_classifier(self.estimator_): + if metric not in METRIC_FUNCTIONS: + raise RuntimeError( + f"Metric must be one of {list(METRIC_FUNCTIONS.keys())}, got {metric}" + ) + else: + if metric not in REGRESSOR_METRICS: + raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') + + if covariate_index is None: + estimator = self.estimator_ + else: + self.permuted_estimator_ = clone(self.estimator_) + estimator = self.permuted_estimator_ + + return self._statistic( + estimator, + X, + y, + covariate_index=covariate_index, + metric=metric, + return_posteriors=return_posteriors, + seed=seed, + **metric_kwargs, + ) + + def test( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike, + metric: str = "mse", + test_size: float = 0.2, + n_repeats: int = 1000, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Perform hypothesis test using permutation testing. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike of shape (n_covariates,) + The covariate indices of ``X`` to shuffle. + metric : str, optional + Metric to compute, by default "mse". + test_size : float, optional + Size of the samples to leave out for each tree to compute posteriors on, + by default 0.2. + n_repeats : int, optional + Number of times to sample the null distribution, by default 1000. + + Returns + ------- + observe_stat : float + Observed test statistic. + pvalue : float + p-value of the test. + """ + X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + indices = np.arange(X.shape[0]) + + # train/test split + # XXX: could add stratifying by y when y is classification + indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) + self.indices_train_ = indices_train + self.indices_test_ = indices_test + + if not hasattr(self, "samples_"): + # first compute the test statistic on the un-permuted data + observe_stat, observe_posteriors, observe_samples = self.statistic( + X, + y, + covariate_index=None, + metric=metric, + return_posteriors=True, + check_input=False, + **metric_kwargs, + ) + else: + observe_samples = self.samples_ + observe_posteriors = self.posterior_final_ + observe_stat = self.stat_ + + # compute null distribution of the test statistic + # WARNING: this could take a long time, since it fits a new forest + null_dist = compute_null_distribution_perm( + X_train=X[self.indices_train_, :], + y_train=y[self.indices_train_, :], + X_test=X[self.indices_test_, :], + y_test=y[self.indices_test_, :], + covariate_index=covariate_index, + est=self.estimator_, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + + if not return_posteriors: + self.null_dist_ = np.array(null_dist) + else: + self.null_dist_ = np.array([x[0] for x in null_dist]) + self.posterior_null_ = np.array([x[1] for x in null_dist]) + + n_repeats = len(self.null_dist_) + pvalue = (1 + (self.null_dist_ < observe_stat).sum()) / (1 + n_repeats) + return observe_stat, pvalue + + +class PermutationForestRegressor(BasePermutationForest): + """Hypothesis testing of covariates with a permutation forest regressor. This implements permutation testing of a null hypothesis using a random forest. The null hypothesis is generated by permuting ``n_repeats`` times the covariate @@ -17,15 +291,27 @@ class PermutationForest(MetaEstimatorMixin): is compared to the original random forest that was computed on the regular non-permuted data. + .. note:: This does not allow testing on the posteriors. + Parameters ---------- + estimator : object, default=None + Type of forest estimator to use. By default `None`, which defaults to + :class:`sklearn.ensemble.RandomForestRegressor`. + n_estimators : int, default=100 The number of trees in the forest. - criterion : {"gini", "entropy"}, default="gini" - The function to measure the quality of a split. Supported criteria are - "gini" for the Gini impurity and "entropy" for the information gain. - Note: this parameter is tree-specific. + criterion : {"squared_error", "friedman_mse", "absolute_error", \ + "poisson"}, default="squared_error" + The function to measure the quality of a split. Supported criteria + are "squared_error" for the mean squared error, which is equal to + variance reduction as feature selection criterion and minimizes the L2 + loss using the mean of each terminal node, "friedman_mse", which uses + mean squared error with Friedman's improvement score for potential + splits, "absolute_error" for the mean absolute error, which minimizes + the L1 loss using the median of each terminal node, and "poisson" which + uses reduction in Poisson deviance to find splits. splitter : {"best", "random"}, default="best" The strategy used to choose the split at each node. Supported @@ -207,14 +493,14 @@ class PermutationForest(MetaEstimatorMixin): def __init__( self, + estimator=None, n_estimators=100, - criterion="gini", - splitter="best", + criterion="squared_error", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, - max_features="sqrt", + max_features=1.0, max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=False, @@ -223,17 +509,14 @@ def __init__( random_state=None, verbose=0, warm_start=False, - class_weight=None, ccp_alpha=0.0, max_samples=None, - honest_prior="empirical", - honest_fraction=0.5, - tree_estimator=None, + **estimators_kwargs, ): - self.estimator = HonestForestClassifier( + super().__init__( + estimator=estimator, n_estimators=n_estimators, criterion=criterion, - splitter=splitter, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, @@ -247,174 +530,306 @@ def __init__( random_state=random_state, verbose=verbose, warm_start=warm_start, - class_weight=class_weight, ccp_alpha=ccp_alpha, max_samples=max_samples, - honest_prior=honest_prior, - honest_fraction=honest_fraction, - tree_estimator=tree_estimator, + **estimators_kwargs, ) - self.n_jobs = n_jobs - self.n_estimators = n_estimators - self.criterion = criterion - self.splitter = splitter - self.max_depth = max_depth - self.min_samples_split = min_samples_split - self.min_samples_leaf = min_samples_leaf - self.min_weight_fraction_leaf = min_weight_fraction_leaf - self.max_features = max_features - self.max_leaf_nodes = max_leaf_nodes - self.min_impurity_decrease = min_impurity_decrease - self.bootstrap = bootstrap - self.oob_score = oob_score - self.random_state = random_state - self.verbose = verbose - self.warm_start = warm_start - self.class_weight = class_weight - self.ccp_alpha = ccp_alpha - self.max_samples = max_samples - self.honest_prior = honest_prior - self.honest_fraction = honest_fraction - self.tree_estimator = tree_estimator - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="auc", - test_size=0.2, - return_posteriors: bool = False, - **metric_kwargs, - ): - """Compute the test statistic. + def _get_estimator(self): + if not hasattr(self, "estimator_") and self.estimator is None: + estimator_ = RandomForestRegressor( + n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + bootstrap=self.bootstrap, + oob_score=self.oob_score, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose, + warm_start=self.warm_start, + ccp_alpha=self.ccp_alpha, + max_samples=self.max_samples, + **self.estimator_kwargs, + ) + elif not isinstance(self.estimator_, ForestRegressor): + raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") + return estimator_ - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "auc". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - rng = np.random.default_rng(self.random_state) - metric_func = METRIC_FUNCTIONS[metric] +class PermutationForestClassifier(BasePermutationForest): + """Hypothesis testing of covariates with a permutation forest classifier. - # first run a dummy fit on just two samples to initialize the - # internal data structure of the forest - if not _is_fitted(self.estimator): - self.estimator.fit(X[:2], y[:2]) + This implements permutation testing of a null hypothesis using a random forest. + The null hypothesis is generated by permuting ``n_repeats`` times the covariate + indices and then a random forest is trained for each permuted instance. This + is compared to the original random forest that was computed on the regular + non-permuted data. - # Fit each tree and ompute posteriors with train test splits - posterior = Parallel(n_jobs=self.n_jobs)( - delayed(tree_posterior)( - tree, X, y, covariate_index=covariate_index, test_size=test_size - ) - for tree in self.estimator.estimators_ - ) + .. note:: This does not allow testing on the posteriors. + + Parameters + ---------- + estimator : object, default=None + Type of forest estimator to use. By default `None`, which defaults to + :class:`sklearn.ensemble.RandomForestClassifier`. - # Average all posteriors - posterior_final = np.nanmean(posterior, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1))[0] - y_true_final = y[samples, :] - posterior_final = posterior_final[samples, :] - stat = metric_func(y_true=y_true_final, y_pred=posterior_final, **metric_kwargs) + n_estimators : int, default=100 + The number of trees in the forest. - # Cache the original test statistic - if covariate_index is None: - self.samples_ = samples - self.y_true_ = y_true_final - self.stat_ = stat - self.posterior_ = posterior_final + criterion : {"gini", "entropy"}, default="gini" + The function to measure the quality of a split. Supported criteria are + "gini" for the Gini impurity and "entropy" for the information gain. - if return_posteriors: - return stat, posterior_final, samples - return stat + splitter : {"best", "random"}, default="best" + The strategy used to choose the split at each node. Supported + strategies are "best" to choose the best split and "random" to choose + the best random split. - def test( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike, - metric: str = "auc", - test_size: float = 0.2, - n_repeats: int = 1000, - return_posteriors: bool = False, - **metric_kwargs, - ): - """Perform hypothesis test using permutation testing. + max_depth : int, default=None + The maximum depth of the tree. If None, then nodes are expanded until + all leaves are pure or until all leaves contain less than + min_samples_split samples. - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike of shape (n_covariates,) - The covariate indices of ``X`` to shuffle. - metric : str, optional - Metric to compute, by default "auc". - test_size : float, optional - Size of the samples to leave out for each tree to compute posteriors on, - by default 0.2. - n_repeats : int, optional - Number of times to sample the null distribution, by default 1000. + min_samples_split : int or float, default=2 + The minimum number of samples required to split an internal node: - Returns - ------- - observe_stat : float - Observed test statistic. - pvalue : float - p-value of the test. - """ - # compute original test statistic - observe_stat = self.statistic(X, y, covariate_index, metric, test_size, **metric_kwargs) + - If int, then consider `min_samples_split` as the minimum number. + - If float, then `min_samples_split` is a fraction and + `ceil(min_samples_split * n_samples)` are the minimum + number of samples for each split. - # compute null distribution of the test statistic - # WARNING: this could take a long time, since it fits a new forest - null_dist = np.array( - Parallel(n_jobs=self.n_jobs)( - [ - delayed(self.statistic)( - self, - X, - y, - covariate_index, - metric, - test_size, - return_posteriors, - **metric_kwargs, - ) - for _ in range(n_repeats) - ] - ) + min_samples_leaf : int or float, default=1 + The minimum number of samples required to be at a leaf node. + A split point at any depth will only be considered if it leaves at + least ``min_samples_leaf`` training samples in each of the left and + right branches. This may have the effect of smoothing the model, + especially in regression. + + - If int, then consider `min_samples_leaf` as the minimum number. + - If float, then `min_samples_leaf` is a fraction and + `ceil(min_samples_leaf * n_samples)` are the minimum + number of samples for each node. + + min_weight_fraction_leaf : float, default=0.0 + The minimum weighted fraction of the sum total of weights (of all + the input samples) required to be at a leaf node. Samples have + equal weight when sample_weight is not provided. + + max_features : {"sqrt", "log2", None}, int or float, default="sqrt" + The number of features to consider when looking for the best split: + + - If int, then consider `max_features` features at each split. + - If float, then `max_features` is a fraction and + `round(max_features * n_features)` features are considered at each + split. + - If "auto", then `max_features=sqrt(n_features)`. + - If "sqrt", then `max_features=sqrt(n_features)`. + - If "log2", then `max_features=log2(n_features)`. + - If None, then `max_features=n_features`. + + Note: the search for a split does not stop until at least one + valid partition of the node samples is found, even if it requires to + effectively inspect more than ``max_features`` features. + + max_leaf_nodes : int, default=None + Grow trees with ``max_leaf_nodes`` in best-first fashion. + Best nodes are defined as relative reduction in impurity. + If None then unlimited number of leaf nodes. + + min_impurity_decrease : float, default=0.0 + A node will be split if this split induces a decrease of the impurity + greater than or equal to this value. + + The weighted impurity decrease equation is the following:: + + N_t / N * (impurity - N_t_R / N_t * right_impurity + - N_t_L / N_t * left_impurity) + + where ``N`` is the total number of samples, ``N_t`` is the number of + samples at the current node, ``N_t_L`` is the number of samples in the + left child, and ``N_t_R`` is the number of samples in the right child. + + ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, + if ``sample_weight`` is passed. + + bootstrap : bool, default=True + Whether bootstrap samples are used when building trees. If False, the + whole dataset is used to build each tree. + + oob_score : bool, default=False + Whether to use out-of-bag samples to estimate the generalization score. + Only available if bootstrap=True. + + n_jobs : int, default=None + The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, + :meth:`decision_path` and :meth:`apply` are all parallelized over the + trees. ``None`` means 1 unless in a `joblib.parallel_backend` + context. ``-1`` means using all processors. See :term:`Glossary + ` for more details. + + random_state : int, RandomState instance or None, default=None + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. + + verbose : int, default=0 + Controls the verbosity when fitting and predicting. + + warm_start : bool, default=False + When set to ``True``, reuse the solution of the previous call to fit + and add more estimators to the ensemble, otherwise, just fit a whole + new forest. See :term:`the Glossary `. + + class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ + default=None + Weights associated with classes in the form ``{class_label: weight}``. + If not given, all classes are supposed to have weight one. For + multi-output problems, a list of dicts can be provided in the same + order as the columns of y. + + Note that for multioutput (including multilabel) weights should be + defined for each class of every column in its own dict. For example, + for four-class multilabel classification weights should be + [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of + [{1:1}, {2:5}, {3:1}, {4:1}]. + + The "balanced" mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies in the input data + as ``n_samples / (n_classes * np.bincount(y))`` + + The "balanced_subsample" mode is the same as "balanced" except that + weights are computed based on the bootstrap sample for every tree + grown. + + For multi-output, the weights of each column of y will be multiplied. + + Note that these weights will be multiplied with sample_weight (passed + through the fit method) if sample_weight is specified. + + ccp_alpha : non-negative float, default=0.0 + Complexity parameter used for Minimal Cost-Complexity Pruning. The + subtree with the largest cost complexity that is smaller than + ``ccp_alpha`` will be chosen. By default, no pruning is performed. See + :ref:`minimal_cost_complexity_pruning` for details. + + max_samples : int or float, default=None + If bootstrap is True, the number of samples to draw from X + to train each base tree estimator. + + - If None (default), then draw `X.shape[0]` samples. + - If int, then draw `max_samples` samples. + - If float, then draw `max_samples * X.shape[0]` samples. Thus, + `max_samples` should be in the interval `(0.0, 1.0]`. + + honest_prior : {"ignore", "uniform", "empirical"}, default="empirical" + Method for dealing with empty leaves during evaluation of a test + sample. If "ignore", the tree is ignored. If "uniform", the prior tree + posterior is 1/(number of classes). If "empirical", the prior tree + posterior is the relative class frequency in the voting subsample. + If all trees are ignored, the empirical estimate is returned. + + honest_fraction : float, default=0.5 + Fraction of training samples used for estimates in the trees. The + remaining samples will be used to learn the tree structure. A larger + fraction creates shallower trees with lower variance estimates. + + tree_estimator : object, default=None + Type of decision tree classifier to use. By default `None`, which + defaults to :class:`sklearn.tree.DecisionTreeClassifier`. + + Attributes + ---------- + samples_ : ArrayLike of shape (n_samples,) + The indices of the samples used in the final test. + + y_true_ : ArrayLike of shape (n_samples_final,) + The true labels of the samples used in the final test. + + posterior_ : ArrayLike of shape (n_samples_final, n_outputs) + The predicted posterior probabilities of the samples used in the final test. + + null_dist_ : ArrayLike of shape (n_repeats,) + The null distribution of the test statistic. + + posterior_null_ : ArrayLike of shape (n_samples_final, n_outputs, n_repeats) + The posterior probabilities of the samples used in the final test for each + permutation for the null distribution. + """ + + def __init__( + self, + estimator=None, + n_estimators=100, + criterion="gini", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + **estimators_kwargs, + ): + super().__init__( + estimator=estimator, + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + **estimators_kwargs, ) - if not return_posteriors: - self.null_dist_ = null_dist - else: - self.null_dist_ = null_dist[0] - self.posterior_null_ = null_dist[1] - pvalue = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) - return observe_stat, pvalue + def _get_estimator(self): + if not hasattr(self, "estimator_") and self.estimator is None: + estimator_ = RandomForestClassifier( + n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + bootstrap=self.bootstrap, + oob_score=self.oob_score, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose, + warm_start=self.warm_start, + ccp_alpha=self.ccp_alpha, + max_samples=self.max_samples, + **self.estimator_kwargs, + ) + elif not isinstance(self.estimator_, ForestRegressor): + raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") + return estimator_ diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 3c4e644eb..c212eae33 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -4,8 +4,8 @@ from sklearn import datasets from sktree._lib.sklearn.tree import DecisionTreeClassifier -from sktree.stats import MIGHT -from sktree.stats.forestht import ForestHT, HyppoForestRegressor +from sktree.stats import MIGHT, PermutationForestClassifier, PermutationForestRegressor +from sktree.stats.forestht import ForestHT from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier # load the iris dataset @@ -33,24 +33,22 @@ def test_forestht_proper_attributes(): pass -def test_iris(): - pass - - -def test_linear_model(): - r"""Test MIGHT using MSE from linear model simulation. +@pytest.mark.slowtest +@pytest.mark.parametrize("hypotester", [PermutationForestRegressor]) +def test_linear_model(hypotester): + r"""Test hypothesis testing forests using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon """ - # j = np.linspace(0.005, 2.25, 9)[] beta = 10.0 - sigma = 0.05 # / j - n_samples = 2500 - n_estimators = 125 + sigma = 0.5 + n_samples = 550 + n_estimators = 50 test_size = 0.1 - # subsample_size = 0.8 + n_repeats = 50 + metric = "mse" rng = np.random.default_rng(seed) @@ -61,7 +59,7 @@ def test_linear_model(): X_610[:, idx] = np.argwhere( rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) )[:, 1] - X = np.concatenate((X_15, X_610), axis=1, dtype=np.float32) + X = np.concatenate((X_15, X_610), axis=1) assert X.shape == (n_samples, 10) # sample noise @@ -69,51 +67,55 @@ def test_linear_model(): # compute final y of (n_samples,) y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon - est = HyppoForestRegressor( + est = hypotester( max_features=1.0, random_state=seed, n_estimators=n_estimators, n_jobs=-1, - permute_per_tree=False, - # bootstrap=True, max_samples=subsample_size ) # test for X_1 - stat, pvalue = est.test(X, y, [0], test_size=test_size) - print(pvalue) - # assert pvalue < 0.05, f"pvalue: {pvalue}" + stat, pvalue = est.test(X, y, [0], metric=metric, test_size=test_size, n_repeats=n_repeats) + print("X1: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" # test for X_6 - stat, pvalue = est.test(X, y, [5], test_size=test_size) - print(pvalue) - # assert pvalue < 0.05, f"pvalue: {pvalue}" + stat, pvalue = est.test(X, y, [5], metric=metric, test_size=test_size, n_repeats=n_repeats) + print("X6: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" # test for a few unimportant other X for covariate_index in [1, 6]: - # test for X_2, X_3, X_4 - stat, pvalue = est.test(X, y, [covariate_index], test_size=test_size) - print(pvalue) - # assert pvalue > 0.05, f"pvalue: {pvalue}" - - assert False + # test for X_2, X_7 + stat, pvalue = est.test( + X, y, [covariate_index], metric=metric, test_size=test_size, n_repeats=n_repeats + ) + print("X2/7: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" -def test_correlated_logit_model(): +@pytest.mark.slowtest +@pytest.mark.parametrize("hypotester", [PermutationForestClassifier]) +def test_correlated_logit_model(hypotester): r"""Test MIGHT using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) """ - beta = 15.0 + beta = 5.0 n_samples = 600 - n_estimators = 125 + n_estimators = 50 n_jobs = -1 + max_features = "sqrt" + test_size = 1.0 / 6 + metric = "mse" + n_repeats = 50 + + n = 200 # Number of time steps + ar_coefficient = 0.0015 - n = 100 # Number of time steps - ar_coefficient = 0.015 rng = np.random.default_rng(seed) - test_size = 0.5 X = np.zeros((n_samples, n)) for idx in range(n_samples): @@ -135,22 +137,30 @@ def test_correlated_logit_model(): assert y_proba.shape == (n_samples,) y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) - est = ForestHT(max_features=n, random_state=seed, n_estimators=n_estimators, n_jobs=n_jobs) + est = hypotester( + max_features=max_features, random_state=seed, n_estimators=n_estimators, n_jobs=n_jobs + ) # test for X_2 important - stat, pvalue = est.test(X, y, [1], test_size=test_size, metric="mse") - print(pvalue) - assert pvalue < 0.6, f"pvalue: {pvalue}" + stat, pvalue = est.test( + X.copy(), y.copy(), [1], test_size=test_size, n_repeats=n_repeats, metric=metric + ) + print("X2: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" - # test for X_1 - stat, pvalue = est.test(X, y, [0], metric="mse") - print(pvalue) - assert pvalue > 0.9, f"pvalue: {pvalue}" + # test for X_1 unimportant + stat, pvalue = est.test( + X.copy(), y.copy(), [0], test_size=test_size, n_repeats=n_repeats, metric=metric + ) + print("X1: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" - # test for X_500 - stat, pvalue = est.test(X, y, [n - 1], metric="mse") - print(pvalue) - assert pvalue > 0.9, f"pvalue: {pvalue}" + # test for X_500 unimportant + stat, pvalue = est.test( + X.copy(), y.copy(), [n - 1], test_size=test_size, n_repeats=n_repeats, metric=metric + ) + print("X500: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" @pytest.mark.parametrize("criterion", ["gini", "entropy"]) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 94754bfb0..3791a388f 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -3,10 +3,16 @@ import numpy as np from numpy.typing import ArrayLike from scipy.stats import entropy -from sklearn.metrics import mean_absolute_error, mean_squared_error, roc_auc_score +from sklearn.metrics import ( + balanced_accuracy_score, + mean_absolute_error, + mean_squared_error, + roc_auc_score, +) from sklearn.utils.validation import check_X_y from sktree._lib.sklearn.ensemble._forest import ForestClassifier +from sktree._lib.sklearn.tree import DecisionTreeClassifier def _mutual_information(y_true, y_pred): @@ -21,15 +27,50 @@ def _mutual_information(y_true, y_pred): "mae": mean_absolute_error, "auc": roc_auc_score, "mi": _mutual_information, + "balanced_accuracy": balanced_accuracy_score, } REGRESSOR_METRICS = ("mse", "mae") +def train_tree( + tree: DecisionTreeClassifier, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, +) -> ArrayLike: + """Compute the posterior from each tree on the "OOB" samples. + + Parameters + ---------- + tree : DecisionTreeClassifier + The tree to compute the posterior from. + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The output matrix. + covariate_index : ArrayLike of shape (n_covariates,), optional + The indices of the covariates to permute, by default None, which + does not permute any columns. + """ + # seed the random number generator using each tree's random seed(?) + rng = np.random.default_rng(tree.random_state) + + indices = np.arange(X.shape[0]) + + if covariate_index is not None: + # perform permutation of covariates + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + perm_X_cov = X[index_arr, covariate_index] + X[:, covariate_index] = perm_X_cov + + # individual tree permutation of y labels + tree.fit(X, y, check_input=False) + + def _pvalue(observe_stat: float, permuted_stat: ArrayLike, correction: bool = True) -> float: - """Compute pvalue with Coleman method. + """Compute pvalue. - Implements the pvalue calculation from Algorithm 1. See - :footcite:`coleman2022scalable` for full details. + Implements the pvalue calculation from optionally with a correction factor. Parameters ---------- @@ -47,13 +88,15 @@ def _pvalue(observe_stat: float, permuted_stat: ArrayLike, correction: bool = Tr """ n_repeats = len(permuted_stat) if correction: - pval = (1 + (permuted_stat >= observe_stat).sum()) / (1 + n_repeats) + pval = (1 + (permuted_stat < observe_stat).sum()) / (1 + n_repeats) else: - pval = (permuted_stat >= observe_stat).sum() / n_repeats + pval = (permuted_stat < observe_stat).sum() / n_repeats return pval def compute_null_distribution_perm( + X_train: ArrayLike, + y_train: ArrayLike, X_test: ArrayLike, y_test: ArrayLike, covariate_index: ArrayLike, @@ -82,29 +125,37 @@ def compute_null_distribution_perm( Random seed, by default None. """ rng = np.random.default_rng(seed) - X_test, y_test = check_X_y(X_test, y_test, ensure_2d=True) - n_samples = len(y_test) - + X_test, y_test = check_X_y(X_test, y_test, ensure_2d=True, multi_output=True) + n_samples_test = len(y_test) + n_samples_train = len(y_train) metric_func = METRIC_FUNCTIONS[metric] # pre-allocate memory for the index array - index_arr = np.arange(n_samples * 2, dtype=int) + train_index_arr = np.arange(n_samples_train, dtype=int).reshape(-1, 1) + test_index_arr = np.arange(n_samples_test, dtype=int).reshape(-1, 1) + + X = np.concatenate((X_train, X_test), axis=0) + index_arr = np.arange(X.shape[0], dtype=int) # .reshape(-1, 1) null_metrics = np.zeros((n_repeats,)) for idx in range(n_repeats): # permute the covariates inplace - rng.shuffle(index_arr) - perm_X_cov = X_test[index_arr, covariate_index] + rng.shuffle(test_index_arr) + perm_X_cov = X_test[test_index_arr, covariate_index] X_test[:, covariate_index] = perm_X_cov + rng.shuffle(train_index_arr) + perm_X_cov = X_train[train_index_arr, covariate_index] + X_train[:, covariate_index] = perm_X_cov + # train a new forest on the permuted data # XXX: should there be a train/test split here? even w/ honest forests? - est.fit(X_test, y_test) - y_pred_proba = est.predict_proba(X_test) + est.fit(X_train, y_train.ravel()) + y_pred = est.predict(X_test) # compute two instances of the metric from the sampled trees - metric_val = metric_func(y_true=y_test, y_pred=y_pred_proba) + metric_val = metric_func(y_test, y_pred) null_metrics[idx] = metric_val return null_metrics diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py index 4e9bc279f..1fd839ab7 100644 --- a/sktree/tree/_classes.py +++ b/sktree/tree/_classes.py @@ -575,7 +575,7 @@ def _build_tree( class ObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier): - """A decision tree classifier. + """An oblique decision tree classifier. Read more in the :ref:`User Guide `. The implementation follows that of :footcite:`breiman2001random` and :footcite:`TomitaSPORF2020`. From 3d666af74011b47287809a2b65846d63aa03139c Mon Sep 17 00:00:00 2001 From: Adam Li Date: Sun, 17 Sep 2023 12:08:08 -0400 Subject: [PATCH 15/70] Upload notebook Signed-off-by: Adam Li --- .codespellignore | 3 +- .../test_permutation_forest.ipynb | 628 ++++++++++++------ sktree/meson.build | 2 +- sktree/stats/__init__.py | 2 +- sktree/stats/_might.py | 2 +- sktree/stats/forestht.py | 377 ++++++----- sktree/stats/permutationforest.py | 4 +- sktree/stats/tests/test_forestht.py | 85 ++- sktree/stats/tests/test_might.py | 2 +- sktree/stats/utils.py | 84 +-- 10 files changed, 777 insertions(+), 412 deletions(-) diff --git a/.codespellignore b/.codespellignore index 909bd6252..41359f0fb 100644 --- a/.codespellignore +++ b/.codespellignore @@ -1,4 +1,5 @@ raison nd parth -ot \ No newline at end of file +ot +fpr \ No newline at end of file diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 6cd8626d7..d0e72e4d4 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, "outputs": [], @@ -14,17 +14,26 @@ "import pandas as pd\n", "import seaborn as sns\n", "\n", - "from sktree.stats import HyppoForestRegressor, PermutationForest\n", + "from sktree.stats import FeatureImportanceForestRegressor, PermutationForestRegressor\n", "\n", "seed = 12345" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 18, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -32,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 20, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -49,6 +58,10 @@ " n_samples = 2200\n", " n_estimators = 125\n", " test_size = 0.1\n", + " n_repeats = 500\n", + " metric = \"mse\"\n", + " permute_per_tree = True\n", + " sample_dataset_per_tree = True\n", "\n", " rng = np.random.default_rng(seed)\n", "\n", @@ -74,24 +87,30 @@ " y = beta * X[:, 0] + (beta * (X[:, 5] - 2)) + epsilon\n", "\n", " # initialize hypothesis tester\n", - " est = PermutationForest(\n", + " est = FeatureImportanceForestRegressor(\n", " max_features=1.0,\n", " random_state=seed,\n", " n_estimators=n_estimators,\n", " n_jobs=-1,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", " # bootstrap=True,\n", " # max_samples=subsample_size\n", " )\n", " pvalue_dict = {}\n", "\n", " # test for X_1\n", - " stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=100, test_size=test_size)\n", + " stat, pvalue = est.test(\n", + " X.copy(), y.copy(), [0], metric=metric, n_repeats=n_repeats, test_size=test_size\n", + " )\n", " print(\"X1: \", pvalue)\n", " pvalue_dict[\"X1\"] = pvalue\n", " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", "\n", " # test for X_6\n", - " stat, pvalue = est.test(X.copy(), y.copy(), [5], n_repeats=100, test_size=test_size)\n", + " stat, pvalue = est.test(\n", + " X.copy(), y.copy(), [5], metric=metric, n_repeats=n_repeats, test_size=test_size\n", + " )\n", " print(\"X6: \", pvalue)\n", " pvalue_dict[\"X6\"] = pvalue\n", " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", @@ -100,7 +119,12 @@ " for name, covariate_index in zip([\"X2\", \"X7\"], [1, 6]):\n", " # test for X_2, X_7\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [covariate_index], n_repeats=100, test_size=test_size\n", + " X.copy(),\n", + " y.copy(),\n", + " [covariate_index],\n", + " metric=metric,\n", + " n_repeats=n_repeats,\n", + " test_size=test_size,\n", " )\n", " print(\"X2/7: \", pvalue)\n", " pvalue_dict[name] = pvalue\n", @@ -123,7 +147,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 25, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -133,185 +157,185 @@ "name": "stdout", "output_type": "stream", "text": [ - "X1: 0.7623762376237624\n", - "X6: 0.0891089108910891\n", - "X2/7: 0.8316831683168316\n", - "X2/7: 0.8712871287128713\n", - "X1: 0.9504950495049505\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.001996007984031936\n", + "X2/7: 0.001996007984031936\n", + "X1: 1.0\n", + "X6: 1.0\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", "X6: 1.0\n", "X2/7: 1.0\n", "X2/7: 1.0\n", - "X1: 0.693069306930693\n", - "X6: 0.6039603960396039\n", - "X2/7: 0.39603960396039606\n", - "X2/7: 0.594059405940594\n", - "X1: 0.9306930693069307\n", - "X6: 0.9900990099009901\n", - "X2/7: 0.9801980198019802\n", - "X2/7: 1.0\n", - "X1: 0.36633663366336633\n", - "X6: 0.039603960396039604\n", - "X2/7: 0.7623762376237624\n", - "X2/7: 0.9603960396039604\n", - "X1: 0.21782178217821782\n", - "X6: 0.43564356435643564\n", - "X2/7: 0.6237623762376238\n", - "X2/7: 0.24752475247524752\n", - "X1: 0.42574257425742573\n", - "X6: 0.7425742574257426\n", - "X2/7: 0.44554455445544555\n", - "X2/7: 0.1188118811881188\n", - "X1: 0.36633663366336633\n", - "X6: 0.0297029702970297\n", - "X2/7: 0.13861386138613863\n", - "X2/7: 0.1485148514851485\n", - "X1: 0.9405940594059405\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 1.0\n", "X6: 1.0\n", - "X2/7: 0.9207920792079208\n", - "X2/7: 1.0\n", - "X1: 0.504950495049505\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.5742574257425742\n", - "X2/7: 1.0\n", - "X1: 0.7128712871287128\n", - "X6: 0.2079207920792079\n", - "X2/7: 0.9306930693069307\n", - "X2/7: 1.0\n", - "X1: 0.26732673267326734\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.5742574257425742\n", - "X2/7: 0.13861386138613863\n", - "X1: 0.7722772277227723\n", - "X6: 0.900990099009901\n", - "X2/7: 0.9900990099009901\n", - "X2/7: 1.0\n", - "X1: 0.297029702970297\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.6633663366336634\n", - "X2/7: 0.009900990099009901\n", - "X1: 0.9702970297029703\n", - "X6: 0.6336633663366337\n", - "X2/7: 0.9900990099009901\n", - "X2/7: 1.0\n", - "X1: 0.16831683168316833\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.26732673267326734\n", - "X2/7: 0.019801980198019802\n", - "X1: 0.27722772277227725\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.24752475247524752\n", - "X2/7: 0.12871287128712872\n", - "X1: 0.16831683168316833\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.5742574257425742\n", - "X2/7: 0.21782178217821782\n", - "X1: 0.019801980198019802\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.09900990099009901\n", - "X2/7: 0.0594059405940594\n", - "X1: 0.2871287128712871\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.40594059405940597\n", - "X2/7: 1.0\n", - "X1: 0.5346534653465347\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.504950495049505\n", - "X2/7: 0.0891089108910891\n", - "X1: 0.16831683168316833\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.900990099009901\n", - "X2/7: 1.0\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.009900990099009901\n", - "X2/7: 0.009900990099009901\n", - "X1: 0.019801980198019802\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.40594059405940597\n", - "X2/7: 0.039603960396039604\n", - "X1: 0.22772277227722773\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.6534653465346535\n", - "X2/7: 0.9504950495049505\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.009900990099009901\n", - "X2/7: 0.009900990099009901\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.09900990099009901\n", - "X2/7: 0.009900990099009901\n", - "X1: 0.5544554455445545\n", - "X6: 0.009900990099009901\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 0.019801980198019802\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.38613861386138615\n", - "X2/7: 1.0\n", - "X1: 0.0594059405940594\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.2079207920792079\n", - "X2/7: 0.33663366336633666\n", - "X1: 0.039603960396039604\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.6732673267326733\n", - "X2/7: 0.504950495049505\n", - "X1: 0.2079207920792079\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.7227722772277227\n", - "X2/7: 0.7623762376237624\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.9801980198019802\n", - "X2/7: 1.0\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.039603960396039604\n", - "X2/7: 0.13861386138613863\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.8811881188118812\n", - "X2/7: 0.9504950495049505\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.9405940594059405\n", - "X2/7: 0.45544554455445546\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.1485148514851485\n", - "X2/7: 0.9900990099009901\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.7128712871287128\n", - "X2/7: 0.019801980198019802\n", - "X1: 0.019801980198019802\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.9504950495049505\n", - "X2/7: 1.0\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.8613861386138614\n", - "X2/7: 0.46534653465346537\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.7227722772277227\n", - "X2/7: 0.019801980198019802\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.9504950495049505\n", - "X2/7: 0.8118811881188119\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.297029702970297\n", - "X2/7: 1.0\n", - "X1: 0.009900990099009901\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.7821782178217822\n", - "X2/7: 0.8613861386138614\n", - "X1: 0.0297029702970297\n", - "X6: 0.009900990099009901\n", - "X2/7: 0.9108910891089109\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 1.0\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 1.0\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 1.0\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 1.0\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", "X2/7: 1.0\n" ] } @@ -336,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 26, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, "outputs": [ @@ -354,9 +378,239 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X1X6X2X7sigma_factor
01.0000000.0019961.0000001.0000000.005
10.0019960.0019960.0019960.0019960.005
21.0000001.0000001.0000001.0000000.005
31.0000001.0000001.0000001.0000000.005
41.0000000.0019961.0000000.0019960.005
\n", + "
" + ], + "text/plain": [ + " X1 X6 X2 X7 sigma_factor\n", + "0 1.000000 0.001996 1.000000 1.000000 0.005\n", + "1 0.001996 0.001996 0.001996 0.001996 0.005\n", + "2 1.000000 1.000000 1.000000 1.000000 0.005\n", + "3 1.000000 1.000000 1.000000 1.000000 0.005\n", + "4 1.000000 0.001996 1.000000 0.001996 0.005" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(df.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "9e60fac2-3b20-493e-886a-892d572a28c6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with Coleman Forest (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7c99ce8c-a32d-447b-9dd2-85c8d310239f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Coleman Forest (Permutation per tree)\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "67846a66-1817-46c8-9ccc-5281773c4f92", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Coleman Forest\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f772759-751d-440c-abcb-13f3ee6f7705", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "7cf30622-ffff-4d00-b474-0ac49fcfde4b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Permutation Forest\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34294429-04f3-4b12-baf3-fa6fdc11646f", + "metadata": {}, "outputs": [], "source": [] } diff --git a/sktree/meson.build b/sktree/meson.build index 8608052b8..b593f75aa 100644 --- a/sktree/meson.build +++ b/sktree/meson.build @@ -54,7 +54,7 @@ cython_c_args += numpy_nodepr_api python_sources = [ '__init__.py', 'neighbors.py', - 'conftest.py', + # 'conftest.py', ] py3.install_sources( diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index b7301579c..0061cd66a 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,3 +1,3 @@ from ._might import MIGHT, MIGHT_MV -from .forestht import ForestHT, FeatureImportanceForestRegressor +from .forestht import FeatureImportanceForestRegressor, ForestHT from .permutationforest import PermutationForestClassifier, PermutationForestRegressor diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index ff3d9ac2b..d88630215 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -304,7 +304,7 @@ def test_diff(self, x, z, y, reps=1000, workers=1): X_permutedZ = np.hstack((x, permuted_Z)) perm_stat, perm_pos = self.statistic(X_permutedZ, y, return_pos=True) - # Boostrap sample the posterior from the two forests + # Bootsrap sample the posterior from the two forests null_stats = np.array( Parallel(n_jobs=workers)( [delayed(pos_diff)(observe_pos, perm_pos, y, limit=self.limit) for _ in range(reps)] diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 0d88f65df..3ad77c8f8 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -5,83 +5,22 @@ from sklearn.utils.validation import _is_fitted, check_X_y from sktree._lib.sklearn.ensemble._forest import ( + BaseForest, ForestClassifier, ForestRegressor, RandomForestRegressor, - BaseForest ) -from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sktree._lib.sklearn.tree import DecisionTreeRegressor from ..ensemble import HonestForestClassifier from .utils import ( METRIC_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, - _pvalue, train_tree, ) -def tree_posterior( - tree: DecisionTreeClassifier, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - test_size: float = 0.2, - seed=None, -) -> ArrayLike: - """Compute the posterior from each tree on the "OOB" samples. - - Parameters - ---------- - tree : DecisionTreeClassifier - The tree to compute the posterior from. - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The output matrix. - covariate_index : ArrayLike of shape (n_covariates,), optional - The indices of the covariates to permute, by default None, which - does not permute any columns. - test_size : float, optional - The size of the OOB set of samples, by default 0.2. - seed : int, optional - Random seed, by default None. - - Returns - ------- - posterior : ArrayLike of shape (n_samples, n_outputs) - The predicted posterior probabilities for each OOB sample from the tree. - For any in-bag samples, the posterior is NaN. - """ - # seed the random number generator using each tree's random seed(?) - rng = np.random.default_rng(tree.random_state) - - indices = np.arange(X.shape[0]) - - if covariate_index is not None: - # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) - perm_X_cov = X[index_arr, covariate_index] - X[:, covariate_index] = perm_X_cov - - # XXX: we can replace this using Forest's generator for the in-bag/oob sample indices when - # https://github.com/scikit-learn/scikit-learn/pull/26736 is merged - # X_train, X_test, y_train, _, _, indices_test = train_test_split( - # X, y, indices, test_size=test_size - # ) - - # individual tree permutation of y labels - tree.fit(X, y, check_input=False) - # y_pred = tree.predict_proba(X_test)[:, 1] - - # Fill test set posteriors & set rest NaN - # posterior = np.full((y.shape[0], tree.n_outputs_), np.nan) - # posterior[indices_test] = y_pred.reshape(-1, tree.n_outputs_) - - # return posterior - - class ForestHT(MetaEstimatorMixin): """Forest hypothesis testing. @@ -605,9 +544,73 @@ def test( return observe_stat, pval -class FeatureImportanceForestRegressor(MetaEstimatorMixin): +class BaseForestHT(MetaEstimatorMixin): + def __init__( + self, + estimator=None, + n_estimators=100, + criterion="squared_error", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + permute_per_tree=True, + **estimator_kwargs, + ): + self.estimator = estimator + self.n_jobs = n_jobs + self.n_estimators = n_estimators + self.criterion = criterion + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.min_samples_leaf = min_samples_leaf + self.min_weight_fraction_leaf = min_weight_fraction_leaf + self.max_features = max_features + self.max_leaf_nodes = max_leaf_nodes + self.min_impurity_decrease = min_impurity_decrease + self.bootstrap = bootstrap + self.oob_score = oob_score + self.random_state = random_state + self.verbose = verbose + self.warm_start = warm_start + self.ccp_alpha = ccp_alpha + self.max_samples = max_samples + self.estimator_kwargs = estimator_kwargs + self.permute_per_tree = permute_per_tree + + def reset(self): + class_attributes = dir(type(self)) + instance_attributes = dir(self) + + for attr_name in instance_attributes: + if attr_name.endswith("_") and attr_name not in class_attributes: + delattr(self, attr_name) + + +class FeatureImportanceForestRegressor(BaseForestHT): """Forest hypothesis testing with continuous `y` variable. + The dataset is split into a training and testing dataset initially. Then there + are two forests that are trained: one on the original dataset, and one on the + permuted dataset. The dataset is either permuted once, or independently for + each tree in the permuted forest. The original test statistic is computed by + comparing the metric on both forests ``(metric_forest - metric_perm_forest)``. + + Then the output predictions are randomly sampled to recompute the test statistic + ``n_repeats`` times. The p-value is computed as the proportion of times the + null test statistic is greater than the original test statistic. + Parameters ---------- estimator : object, default=None @@ -801,26 +804,60 @@ def __init__( ccp_alpha=0.0, max_samples=None, permute_per_tree=True, + sample_dataset_per_tree=False, + **estimator_kwargs, ): - self.estimator = estimator - self.n_jobs = n_jobs - self.n_estimators = n_estimators - self.criterion = criterion - self.max_depth = max_depth - self.min_samples_split = min_samples_split - self.min_samples_leaf = min_samples_leaf - self.min_weight_fraction_leaf = min_weight_fraction_leaf - self.max_features = max_features - self.max_leaf_nodes = max_leaf_nodes - self.min_impurity_decrease = min_impurity_decrease - self.bootstrap = bootstrap - self.oob_score = oob_score - self.random_state = random_state - self.verbose = verbose - self.warm_start = warm_start - self.ccp_alpha = ccp_alpha - self.max_samples = max_samples + super().__init__( + estimator=estimator, + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + **estimator_kwargs, + ) self.permute_per_tree = permute_per_tree + self.sample_dataset_per_tree = sample_dataset_per_tree + + def _get_estimator(self): + if self.estimator is None: + estimator_ = RandomForestRegressor( + n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + bootstrap=self.bootstrap, + oob_score=self.oob_score, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose, + warm_start=self.warm_start, + ccp_alpha=self.ccp_alpha, + max_samples=self.max_samples, + **self.estimator_kwargs, + ) + elif isinstance(self.estimator, ForestRegressor): + raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator)}") + else: + estimator_ = self.estimator + return estimator_ def _statistic( self, @@ -829,7 +866,6 @@ def _statistic( y: ArrayLike, covariate_index: ArrayLike = None, metric="mse", - test_size=0.2, return_posteriors: bool = False, **metric_kwargs, ): @@ -837,9 +873,36 @@ def _statistic( metric_func = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) n_samples = X.shape[0] - indices = np.arange(n_samples, dtype=int) - if self.permute_per_tree: + if self.permute_per_tree and not self.sample_dataset_per_tree: + # first run a dummy fit on the samples to initialize the + # internal data structure of the forest + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + + # Fit each tree and compute posteriors with train test splits + n_samples_test = len(self.indices_test_) + + # now initialize posterior array as (n_trees, n_samples_test, n_outputs) + posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) + for idx in range(self.n_estimators): + tree: DecisionTreeRegressor = estimator.estimators_[idx] + train_tree( + tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index + ) + + y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + posterior_arr[idx, ...] = y_pred # posterior + + y_true_final = y[self.indices_test_, :] + # Average all posteriors + posterior_final = np.nanmean(posterior_arr, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() + elif self.permute_per_tree and self.sample_dataset_per_tree: # first run a dummy fit on the samples to initialize the # internal data structure of the forest if not _is_fitted(estimator): @@ -847,38 +910,49 @@ def _statistic( X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) estimator.fit(X_dummy, unique_y) + # now initialize posterior array as (n_trees, n_samples, n_outputs) + posterior_arr = np.full((self.n_estimators, n_samples, estimator.n_outputs_), np.nan) # Fit each tree and compute posteriors with train test splits - posterior_arr = np.zeros((self.n_estimators, n_samples, estimator.n_outputs_)) for idx in range(self.n_estimators): - seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) + # sample train/test dataset for each tree indices_train, indices_test = train_test_split( - indices, test_size=test_size, stratify=y, shuffle=True, random_state=seed + np.arange(n_samples, dtype=int), + test_size=self.test_size_, + shuffle=True, + random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), ) tree: DecisionTreeRegressor = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) - y_pred = tree.predict(X[indices_test, :]) + y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - # Fill test set posteriors & set rest NaN - posterior = np.full((y.shape[0], tree.n_outputs_), np.nan) - posterior[indices_test, :] = y_pred - posterior_arr[idx, ...] = posterior + posterior_arr[idx, indices_test, :] = y_pred # posterior # Average all posteriors posterior_final = np.nanmean(posterior_arr, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() - y_true_final = y[samples, :] - posterior_final = posterior_final[samples, :] - else: - if covariate_index is not None: - print("Permuting the covariate...") - # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) - X[:, covariate_index] = X[index_arr, covariate_index] + # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + + # Ignore all NaN values (samples not tested) + y_true_final = y[nonnan_indices, :] + posterior_final = posterior_final[nonnan_indices, :] + samples = nonnan_indices + else: X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + if covariate_index is not None: + # perform permutation of covariates + n_samples_train = X_train.shape[0] + index_arr = rng.choice( + np.arange(n_samples_train, dtype=int), + size=(n_samples_train, 1), + replace=False, + shuffle=True, + ) + X_train[:, covariate_index] = X_train[index_arr, covariate_index] + estimator.fit(X_train, y_train) y_pred = estimator.predict(X_test) @@ -887,10 +961,7 @@ def _statistic( y_true_final = y_test posterior_final = y_pred - # print('Y true: ', y_true_final) - # print('posterior: ', posterior_final) stat = metric_func(y_true_final, posterior_final, **metric_kwargs) - if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior @@ -904,21 +975,12 @@ def _statistic( return stat - def reset(self): - class_attributes = dir(type(self)) - instance_attributes = dir(self) - - for attr_name in instance_attributes: - if attr_name.endswith("_") and attr_name not in class_attributes: - delattr(self, attr_name) - def statistic( self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, metric="mse", - test_size=0.2, return_posteriors: bool = False, check_input: bool = True, **metric_kwargs, @@ -962,30 +1024,8 @@ def statistic( if metric not in REGRESSOR_METRICS: raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') - if not hasattr(self, "estimator_") and self.estimator is None: - self.estimator_ = RandomForestRegressor( - n_estimators=self.n_estimators, - criterion=self.criterion, - max_depth=self.max_depth, - min_samples_split=self.min_samples_split, - min_samples_leaf=self.min_samples_leaf, - min_weight_fraction_leaf=self.min_weight_fraction_leaf, - max_features=self.max_features, - max_leaf_nodes=self.max_leaf_nodes, - min_impurity_decrease=self.min_impurity_decrease, - bootstrap=self.bootstrap, - oob_score=self.oob_score, - n_jobs=self.n_jobs, - random_state=self.random_state, - verbose=self.verbose, - warm_start=self.warm_start, - ccp_alpha=self.ccp_alpha, - max_samples=self.max_samples, - ) - elif not isinstance(self.estimator_, ForestRegressor): - raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") - if covariate_index is None: + self.estimator_ = self._get_estimator() estimator = self.estimator_ else: self.permuted_estimator_ = clone(self.estimator_) @@ -997,7 +1037,6 @@ def statistic( y, covariate_index=covariate_index, metric=metric, - test_size=test_size, return_posteriors=return_posteriors, **metric_kwargs, ) @@ -1054,14 +1093,18 @@ def test( y = y.reshape(-1, 1) indices = np.arange(X.shape[0]) - if not self.permute_per_tree: - # train/test split - # XXX: could add stratifying by y when y is classification - indices_train, indices_test = train_test_split( - indices, test_size=test_size, shuffle=True - ) - self.indices_train_ = indices_train - self.indices_test_ = indices_test + self.test_size_ = int(test_size * X.shape[0]) + # if not self.permute_per_tree: + # # train/test split + # # XXX: could add stratifying by y when y is classification + # indices_train, indices_test = train_test_split( + # indices, test_size=test_size, shuffle=True + # ) + # self.indices_train_ = indices_train + # self.indices_test_ = indices_test + indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) + self.indices_train_ = indices_train + self.indices_test_ = indices_test if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data @@ -1070,7 +1113,6 @@ def test( y, covariate_index=None, metric=metric, - test_size=test_size, return_posteriors=True, check_input=False, **metric_kwargs, @@ -1086,7 +1128,6 @@ def test( y, covariate_index=covariate_index, metric=metric, - test_size=test_size, return_posteriors=True, check_input=False, **metric_kwargs, @@ -1095,26 +1136,32 @@ def test( # Note: at this point, both `estimator` and `permuted_estimator_` should # have been fitted already, so we can now compute on the null by resampling # the posteriors and computing the test statistic on the resampled posteriors - metric_star, metric_star_pi = _compute_null_distribution_coleman( - X_test=X, - y_test=y, - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - normal_samples=observe_samples, - perm_samples=permute_samples, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - # print(observe_posteriors) - # print(permute_posteriors) - # metric^\pi - metric + if self.sample_dataset_per_tree: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[observe_samples, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + else: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[self.indices_test_, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + # metric^\pi - metric = observed test statistic, which under the null is normally distributed around 0 observe_stat = permute_stat - observe_stat - # metric^\pi_j - metric_j + # metric^\pi_j - metric_j, which is centered at 0 null_dist = metric_star_pi - metric_star - pval = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) + # compute pvalue + pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) if return_posteriors: self.observe_posteriors_ = observe_posteriors @@ -1123,4 +1170,4 @@ def test( self.permute_samples_ = permute_samples self.null_dist_ = null_dist - return observe_stat, pval + return observe_stat, pvalue diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index a4ce3cde6..fb9c6a98b 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -7,7 +7,7 @@ from sktree._lib.sklearn.ensemble._forest import BaseForest, ForestClassifier, ForestRegressor -from .utils import METRIC_FUNCTIONS, REGRESSOR_METRICS, compute_null_distribution_perm +from .utils import METRIC_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_perm class BasePermutationForest(MetaEstimatorMixin): @@ -259,7 +259,7 @@ def test( # compute null distribution of the test statistic # WARNING: this could take a long time, since it fits a new forest - null_dist = compute_null_distribution_perm( + null_dist = _compute_null_distribution_perm( X_train=X[self.indices_train_, :], y_train=y[self.indices_train_, :], X_test=X[self.indices_test_, :], diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index c212eae33..93e2138f8 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -4,9 +4,13 @@ from sklearn import datasets from sktree._lib.sklearn.tree import DecisionTreeClassifier -from sktree.stats import MIGHT, PermutationForestClassifier, PermutationForestRegressor +from sktree.stats import ( + FeatureImportanceForestRegressor, + PermutationForestClassifier, + PermutationForestRegressor, +) from sktree.stats.forestht import ForestHT -from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier +from sktree.tree import ObliqueDecisionTreeClassifier # load the iris dataset # and randomly permute it @@ -34,7 +38,13 @@ def test_forestht_proper_attributes(): @pytest.mark.slowtest -@pytest.mark.parametrize("hypotester", [PermutationForestRegressor]) +@pytest.mark.parametrize( + "hypotester", + [ + # PermutationForestRegressor, + FeatureImportanceForestRegressor + ], +) def test_linear_model(hypotester): r"""Test hypothesis testing forests using MSE from linear model simulation. @@ -94,6 +104,75 @@ def test_linear_model(hypotester): assert pvalue > 0.05, f"pvalue: {pvalue}" +@pytest.mark.slowtest +@pytest.mark.parametrize( + "hypotester", + [ + # PermutationForestRegressor, + FeatureImportanceForestRegressor + ], +) +def test_linear_model_withcoleman(hypotester): + r"""Test hypothesis testing forests using MSE from linear model simulation. + + See https://arxiv.org/pdf/1904.07830.pdf Figure 1. + + Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon + """ + beta = 10.0 + sigma = 0.5 + n_samples = 600 + n_estimators = 125 + test_size = 0.1 + n_repeats = 200 + permute_per_tree = True + metric = "mse" + + rng = np.random.default_rng(seed) + + # sample covariates + X_15 = rng.uniform(0, 1, size=(n_samples, 5)) + X_610 = np.zeros((n_samples, 5)) + for idx in range(5): + X_610[:, idx] = np.argwhere( + rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) + )[:, 1] + X = np.concatenate((X_15, X_610), axis=1) + assert X.shape == (n_samples, 10) + + # sample noise + epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) + + # compute final y of (n_samples,) + y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon + est = hypotester( + max_features=1.0, + random_state=seed, + n_estimators=n_estimators, + permute_per_tree=permute_per_tree, + n_jobs=-1, + ) + + # test for X_1 + stat, pvalue = est.test(X, y, [0], metric=metric, test_size=test_size, n_repeats=n_repeats) + print("X1: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for X_6 + stat, pvalue = est.test(X, y, [5], metric=metric, test_size=test_size, n_repeats=n_repeats) + print("X6: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for a few unimportant other X + for covariate_index in [1, 6]: + # test for X_2, X_7 + stat, pvalue = est.test( + X, y, [covariate_index], metric=metric, test_size=test_size, n_repeats=n_repeats + ) + print("X2/7: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" + + @pytest.mark.slowtest @pytest.mark.parametrize("hypotester", [PermutationForestClassifier]) def test_correlated_logit_model(hypotester): diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py index 825137217..ff613c571 100644 --- a/sktree/stats/tests/test_might.py +++ b/sktree/stats/tests/test_might.py @@ -4,7 +4,7 @@ from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import MIGHT -from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier +from sktree.tree import ObliqueDecisionTreeClassifier # load the iris dataset # and randomly permute it diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 3791a388f..c0a988520 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -15,8 +15,8 @@ from sktree._lib.sklearn.tree import DecisionTreeClassifier -def _mutual_information(y_true, y_pred): - H_YX = np.mean(entropy(y_pred, base=np.exp(1))) +def _mutual_information(y_true, y_pred_proba): + H_YX = np.mean(entropy(y_pred_proba, base=np.exp(1), axis=1)) _, counts = np.unique(y_true, return_counts=True) H_Y = entropy(counts, base=np.exp(1)) return max(H_Y - H_YX, 0) @@ -55,11 +55,11 @@ def train_tree( # seed the random number generator using each tree's random seed(?) rng = np.random.default_rng(tree.random_state) - indices = np.arange(X.shape[0]) + indices = np.arange(X.shape[0], dtype=int) if covariate_index is not None: # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=True) perm_X_cov = X[index_arr, covariate_index] X[:, covariate_index] = perm_X_cov @@ -67,34 +67,7 @@ def train_tree( tree.fit(X, y, check_input=False) -def _pvalue(observe_stat: float, permuted_stat: ArrayLike, correction: bool = True) -> float: - """Compute pvalue. - - Implements the pvalue calculation from optionally with a correction factor. - - Parameters - ---------- - observe_stat : float - The observed test statistic. - permuted_stat : ArrayLike of shape (n_repeats,) - The array of test statistics computed on permutations. - correction : bool - Whether to use correction and add 1 to the numerator and denominator, by default True. - - Returns - ------- - pval : float - The pvalue. - """ - n_repeats = len(permuted_stat) - if correction: - pval = (1 + (permuted_stat < observe_stat).sum()) / (1 + n_repeats) - else: - pval = (permuted_stat < observe_stat).sum() / n_repeats - return pval - - -def compute_null_distribution_perm( +def _compute_null_distribution_perm( X_train: ArrayLike, y_train: ArrayLike, X_test: ArrayLike, @@ -135,8 +108,6 @@ def compute_null_distribution_perm( test_index_arr = np.arange(n_samples_test, dtype=int).reshape(-1, 1) X = np.concatenate((X_train, X_test), axis=0) - index_arr = np.arange(X.shape[0], dtype=int) # .reshape(-1, 1) - null_metrics = np.zeros((n_repeats,)) for idx in range(n_repeats): @@ -162,12 +133,9 @@ def compute_null_distribution_perm( def _compute_null_distribution_coleman( - X_test: ArrayLike, y_test: ArrayLike, y_pred_proba_normal: ArrayLike, y_pred_proba_perm: ArrayLike, - normal_samples: ArrayLike, - perm_samples: ArrayLike, metric: str = "mse", n_repeats: int = 1000, seed: int = None, @@ -209,33 +177,49 @@ def _compute_null_distribution_coleman( metric_func = METRIC_FUNCTIONS[metric] - # sample two sets of equal number of trees from the combined forest + # sample two sets of equal number of trees from the combined forest these are the posteriors all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) - # get the indices of the samples that we have a posterior for, so each element - # is an index into `y_test` - all_samples_pred = np.concatenate((normal_samples, perm_samples), axis=0) + n_samples_test = len(y_test) + assert len(all_y_pred) == 2 * n_samples_test + + # create two stacked index arrays of y_test resulting in [1, ..., N, 1, ..., N] + y_test_ind_arr = np.hstack( + (np.arange(n_samples_test, dtype=int), np.arange(n_samples_test, dtype=int)) + ) - n_samples_final = len(all_samples_pred) + # create index array of [1, ..., 2N] to slice into `all_y_pred` + y_pred_ind_arr = np.arange((2 * n_samples_test), dtype=int) + + # # get the indices of the samples that we have a posterior for, so each element + # # is an index into `y_test` + # all_samples_pred = np.concatenate((normal_samples, perm_samples), axis=0) + + # n_samples_final = len(all_samples_pred) # pre-allocate memory for the index array - index_arr = np.arange(n_samples_final, dtype=int) + # index_arr = np.arange(n_samples_final, dtype=int) metric_star = np.zeros((n_repeats,)) metric_star_pi = np.zeros((n_repeats,)) for idx in range(n_repeats): # two sets of random indices from 1 : 2N are sampled using Fisher-Yates - rng.shuffle(index_arr) - first_half_index = index_arr[: n_samples_final // 2] - second_half_index = index_arr[n_samples_final // 2 :] + rng.shuffle(y_pred_ind_arr) + + first_forest_inds = y_pred_ind_arr[:n_samples_test] + second_forest_inds = y_pred_ind_arr[:n_samples_test] + + # index into y_test for first half and second half + first_half_index_test = y_test_ind_arr[first_forest_inds] + second_half_index_test = y_test_ind_arr[second_forest_inds] # now get the pointers to the actual samples used for the metric - y_test_first_half = y_test[all_samples_pred[first_half_index]] - y_test_second_half = y_test[all_samples_pred[second_half_index]] + y_test_first_half = y_test[first_half_index_test] + y_test_second_half = y_test[second_half_index_test] # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test_first_half, all_y_pred[first_half_index]) - second_half_metric = metric_func(y_test_second_half, all_y_pred[second_half_index]) + first_half_metric = metric_func(y_test_first_half, all_y_pred[first_forest_inds]) + second_half_metric = metric_func(y_test_second_half, all_y_pred[second_forest_inds]) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From aab5df304790174dfc8f04100c437608218f6bb7 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Mon, 18 Sep 2023 10:06:36 -0400 Subject: [PATCH 16/70] FIX correct MI calculation for MIGHT 2-class --- sktree/stats/_might.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index d88630215..102dc1448 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -155,7 +155,9 @@ def statistic( posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit ) elif stat == "MI": - H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1))) + class_zero = (1 - posterior_final[:, 1]).reshape(-1, 1) + full_class = np.hstack((class_zero, posterior_final[:, 1].reshape(-1, 1))) + H_YX = np.mean(entropy(full_class, base=np.exp(1), axis=1)) _, counts = np.unique(posterior_final[:, 0], return_counts=True) H_Y = entropy(counts, base=np.exp(1)) self.stat = max(H_Y - H_YX, 0) From 972fe95547f04083c0c9f8e597a6a1fb219264bf Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 18 Sep 2023 10:09:48 -0400 Subject: [PATCH 17/70] Correlated logit model Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 585 ++++++++++++++---- sktree/stats/__init__.py | 2 +- sktree/stats/forestht.py | 578 ++++++++++++++++- 3 files changed, 1030 insertions(+), 135 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index d0e72e4d4..12a0e6c0e 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 17, + "execution_count": 15, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, "outputs": [], @@ -13,27 +13,23 @@ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", + "from scipy.special import expit\n", "\n", - "from sktree.stats import FeatureImportanceForestRegressor, PermutationForestRegressor\n", + "from sktree.stats import (\n", + " FeatureImportanceForestClassifier,\n", + " FeatureImportanceForestRegressor,\n", + " PermutationForestRegressor,\n", + ")\n", "\n", "seed = 12345" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -41,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -137,14 +133,112 @@ " pass\n", "\n", "\n", - "def correlated_logit_model():\n", - " pass\n", - "\n", - "\n", "def random_forest_model():\n", " pass" ] }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3db4f740-afd9-413e-8089-a8245f2a0747", + "metadata": {}, + "outputs": [], + "source": [ + "def correlated_logit_model(beta=5.0, seed=None):\n", + " r\"\"\"Test MIGHT using MSE from linear model simulation.\n", + "\n", + " See https://arxiv.org/pdf/1904.07830.pdf Figure 1.\n", + "\n", + " P(Y = 1 | X) = expit(beta * \\\\sum_{j=2}^5 X_j)\n", + " \"\"\"\n", + " n_samples = 600\n", + " n_estimators = 125\n", + " n_jobs = -1\n", + " max_features = \"sqrt\"\n", + " test_size = 1.0 / 6\n", + " metric = \"mse\"\n", + " n_repeats = 200\n", + " permute_per_tree = True\n", + " sample_dataset_per_tree = True\n", + "\n", + " n = 500 # Number of time steps\n", + " ar_coefficient = 0.15\n", + "\n", + " rng = np.random.default_rng(seed)\n", + "\n", + " X = np.zeros((n_samples, n))\n", + " for idx in range(n_samples):\n", + " # sample covariates\n", + " white_noise = rng.standard_normal(size=n)\n", + "\n", + " # Create an array to store the simulated AR(1) time series\n", + " ar1_series = np.zeros(n)\n", + " ar1_series[0] = white_noise[0]\n", + "\n", + " # Simulate the AR(1) process\n", + " for t in range(1, n):\n", + " ar1_series[t] = ar_coefficient * ar1_series[t - 1] + white_noise[t]\n", + "\n", + " X[idx, :] = ar1_series\n", + "\n", + " # now compute the output labels\n", + " y_proba = expit(beta * X[:, 1:5].sum(axis=1))\n", + " assert y_proba.shape == (n_samples,)\n", + " y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1)\n", + "\n", + " pvalue_dict = {}\n", + "\n", + " # initialize hypothesis tester\n", + " est = FeatureImportanceForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + " # bootstrap=True,\n", + " # max_samples=subsample_size\n", + " )\n", + "\n", + " # test for X_2 important\n", + " stat, pvalue = est.test(\n", + " X.copy(), y.copy(), [1], test_size=test_size, n_repeats=n_repeats, metric=metric\n", + " )\n", + " pvalue_dict[\"X2\"] = pvalue\n", + " print(\"X2: \", pvalue)\n", + " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", + "\n", + " # test for X_1 unimportant\n", + " stat, pvalue = est.test(\n", + " X.copy(), y.copy(), [0], test_size=test_size, n_repeats=n_repeats, metric=metric\n", + " )\n", + " pvalue_dict[\"X1\"] = pvalue\n", + " print(\"X1: \", pvalue)\n", + " # assert pvalue > 0.05, f\"pvalue: {pvalue}\"\n", + "\n", + " # test for X_500 unimportant\n", + " stat, pvalue = est.test(\n", + " X.copy(),\n", + " y.copy(),\n", + " [n - 1],\n", + " test_size=test_size,\n", + " n_repeats=n_repeats,\n", + " metric=metric,\n", + " )\n", + " pvalue_dict[\"X500\"] = pvalue\n", + " print(\"X500: \", pvalue)\n", + " # assert pvalue > 0.05, f\"pvalue: {pvalue}\"\n", + " return pvalue_dict" + ] + }, + { + "cell_type": "markdown", + "id": "36acd19f-0e66-455e-b6e4-288e1c9d020c", + "metadata": {}, + "source": [ + "# Run Experiment on Linear ANCOVA Model" + ] + }, { "cell_type": "code", "execution_count": 25, @@ -350,7 +444,7 @@ " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", "\n", - " elements_dict = linear_model_ancova(sigma_factor, new_seed)\n", + " elements_dict = correlated_logit_model(beta, new_seed)\n", " for key, value in elements_dict.items():\n", " pvalue_dict[key].append(value)\n", " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", @@ -360,7 +454,310 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, + "id": "9e60fac2-3b20-493e-886a-892d572a28c6", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with Coleman Forest (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "7c99ce8c-a32d-447b-9dd2-85c8d310239f", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Coleman Forest (Permutation per tree)\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "67846a66-1817-46c8-9ccc-5281773c4f92", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Coleman Forest\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "7cf30622-ffff-4d00-b474-0ac49fcfde4b", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAJQCAYAAAATyPJiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVfoH8O8t02t6IwVI6AgISlEEFEVQ7G1FRda1uxbEXXvB3mVdsbv4U1TEui4qKs1CEWlKJ5CQQHqZ3u89vz8mGQlpk2QmMwnv53nyQGbuzJwpOXPfc877Ho4xxkAIIYQQQgghXcDHugGEEEIIIYSQno8CC0IIIYQQQkiXUWBBCCGEEEII6TIKLAghhBBCCCFdRoEFIYQQQgghpMsosCCEEEIIIYR0GQUWhBBCCCGEkC6jwIIQQgghhBDSZRRYEEIIIYQQQrqMAgtCIqS4uBgcx2HRokWxbgrp5bryWVu9ejU4jsPq1asj3i4AmDx5MiZPnhz2scOGDYtKO0hzV199NfLy8mLdDEJIL0aBBSFhWLRoETiOw2+//RbrpnSLf/zjH+A4DpdeemmL1zee2HIch08//bTZ9Q8//DA4jkNNTU2z61avXo0LLrgA6enpUCqVSE1NxcyZM/HZZ581O7a2thZ33XUXBg4cCLVajcTEREybNg3/+9//mhx3zjnnQKvVwm63t/qcZs2aBaVSidra2tBlFosFarUaHMdh165drd6WdF5ZWRkefvhhbN26NeL3nZeXF/occhyH1NRUTJw4EZ9//nnEH6u7LFy4sEuDE9F8vTvryP7i6J9x48bFunkt+uCDD/DSSy/FuhmE9DhirBtASG+Rm5sLt9sNhUIR66Z0CWMMH374IfLy8vDVV1/BbrfDYDC0evz8+fNxwQUXgOO4du/7oYcewvz581FQUIDrr78eubm5qK2txddff40LL7wQixcvxuWXXw4A2LNnD0477TRUV1djzpw5GDNmDCwWCxYvXoyZM2di3rx5ePbZZwEEg4avvvoKn3/+Oa666qpmj+tyufDll1/izDPPRFJSUujypUuXguM4pKenY/HixXjsscc6+nKRo3z33XdNfi8rK8MjjzyCvLw8jBw5MuKPN3LkSNx5552hx3r99ddxwQUX4NVXX8UNN9wQ8ceLtoULFyI5ORlXX311p27f1uv95ptvQpblrjeyk/7yl79gxowZTS5LSUmJUWva9sEHH2D79u24/fbbY90UQnoUCiwIiRCO46BWq2PdjDa5XC5otdo2j1m9ejUOHTqElStXYtq0afjss88we/bsFo8dOXIktm7dis8//xwXXHBBm/f7ySefYP78+bjooovwwQcfNAnA7rrrLixfvhx+vx8A4Pf7cdFFF6G+vh4//vgjxo4dGzr2jjvuwKxZs/Dcc89hzJgxuPTSS3HOOefAYDDggw8+aDGw+PLLL+F0OjFr1qwml7///vuYMWMGcnNz8cEHH1BgEQFKpbJbHy8rKwtXXHFF6PerrroK+fn5ePHFF7scWHg8HiiVSvB875jcj/Wgx/HHH9/kvYqU3vY+EdKT0V8hIRHS0rr3q6++Gnq9HocPH8Z5550HvV6PlJQUzJs3D5IkNbm9LMt46aWXMHToUKjVaqSlpeH6669HfX19k+O+/PJLnHXWWcjMzIRKpUL//v3x6KOPNru/xvXrmzZtwimnnAKtVot777233eexePFiDBkyBFOmTMHUqVOxePHiVo+97LLLMGDAAMyfPx+MsTbv94EHHkBiYiLeeeedFk9wpk2bhrPPPhsA8Omnn2L79u24++67mwQVACAIAl5//XWYzWY8/PDDAACNRoMLLrgAK1asQFVVVbP7/uCDD2AwGHDOOeeELispKcFPP/2Eyy67DJdddhmKioqwdu3aNp9Do8alXnv37sUVV1wBk8mElJQUPPDAA2CMobS0FOeeey6MRiPS09Px/PPPN7uPqqoqXHPNNUhLS4NarcaIESPw7rvvNjvOYrHg6quvhslkgtlsxuzZs2GxWFps1+7du3HRRRchMTERarUaY8aMwX//+9+wntORfv/9d3Ac1+S2mzZtAsdxOP7445scO3369Cbv0ZE5FqtXr8YJJ5wAAJgzZ05o+cvRS3127tyJKVOmQKvVIisrC88880yH29woPT0dgwcPRlFRUeiyw4cP469//SvS0tKgUqkwdOhQvPPOO01u15h78tFHH+H+++9HVlYWtFotbDZb6O+4pKQEZ599NvR6PbKysvDKK68AAP744w+ceuqp0Ol0oSD1SI2fl6M1LrEsLi4GEFzatWPHDqxZsyb0WjW+lnV1dZg3bx6GDx8OvV4Po9GI6dOnY9u2bU2eQ1uvd0s5Fk6nE3feeSeys7OhUqkwcOBAPPfcc83+njmOwy233IIvvvgCw4YNC72O3377bXhvTBgOHDiAiy++GImJidBqtRg3bhyWLVvW5Ji23icA2LBhA84880yYTCZotVpMmjQJv/zyS5P7sNvtuP3225GXlweVSoXU1FScfvrp2Lx5M4DgZ3jZsmU4ePBg6DWk3BRCwkOBBSFRJkkSpk2bhqSkJDz33HOYNGkSnn/+ebzxxhtNjrv++utx11134aSTTsKCBQswZ84cLF68GNOmTQuN5APBkxG9Xo+5c+diwYIFGD16NB588EHcfffdzR67trYW06dPx8iRI/HSSy9hypQpbbbV6/Xi008/xV/+8hcAwaULK1euREVFRYvHC4KA+++/H9u2bWtzXfu+ffuwe/dunHfeeW0uq2r01VdfAUCLsw8AYDKZcO6552L37t0oLCwEEFwOFQgE8PHHHzc5tq6uDsuXL8f5558PjUYTuvzDDz+ETqfD2WefjRNPPBH9+/dvM4hqyaWXXgpZlvHUU09h7NixeOyxx/DSSy/h9NNPR1ZWFp5++mnk5+dj3rx5+PHHH0O3c7vdmDx5Mt577z3MmjULzz77LEwmE66++mosWLAgdBxjDOeeey7ee+89XHHFFXjsscdw6NChFmeQduzYgXHjxmHXrl24++678fzzz0On0+G8887rcM7BsGHDYDabm7T5p59+As/z2LZtW+gkTpZlrF27FqecckqL9zN48GDMnz8fAHDdddfhvffew3vvvdfk+Pr6epx55pkYMWIEnn/+eQwaNAj//Oc/8c0333SozY38fj9KS0tDS94qKysxbtw4/PDDD7jllluwYMEC5Ofn45prrmlxDf2jjz6KZcuWYd68eXjiiSdCMzCSJGH69OnIzs7GM888g7y8PNxyyy1YtGgRzjzzTIwZMwZPP/00DAYDrrrqqiaBTbheeukl9OnTB4MGDQq9Vvfddx+A4En3F198gbPPPhsvvPAC7rrrLvzxxx+YNGkSysrKAIT3eh+JMYZzzjkHL774Is4880y88MILGDhwIO666y7MnTu32fE///wzbrrpJlx22WV45pln4PF4cOGFFzbJW2qLy+VCTU1Nk5/Gvq2yshITJkzA8uXLcdNNN+Hxxx+Hx+PBOeec0+Lnt6X3aeXKlTjllFNgs9nw0EMP4YknnoDFYsGpp56KX3/9NXTbG264Aa+++iouvPBCLFy4EPPmzYNGownlWd13330YOXIkkpOTQ68h5VsQEiZGCGnXf/7zHwaAbdy4sdVjioqKGAD2n//8J3TZ7NmzGQA2f/78JseOGjWKjR49OvT7Tz/9xACwxYsXNznu22+/bXa5y+Vq9tjXX38902q1zOPxhC6bNGkSA8Bee+21sJ/nJ598wgCwffv2McYYs9lsTK1WsxdffLHF5/rss8+yQCDACgoK2IgRI5gsy4wxxh566CEGgFVXVzPGGPvyyy8ZgGb305qRI0cyk8nU5jEvvPACA8D++9//MsYYCwQCLCMjg40fP77Jca+99hoDwJYvX97k8uHDh7NZs2aFfr/33ntZcnIy8/v97bav8fldd911ocsCgQDr06cP4ziOPfXUU6HL6+vrmUajYbNnzw5d9tJLLzEA7P333w9d5vP52Pjx45ler2c2m40xxtgXX3zBALBnnnmmyeNMnDix2WfttNNOY8OHD2/yGZBlmU2YMIEVFBSELlu1ahUDwFatWtXmczzrrLPYiSeeGPr9ggsuYBdccAETBIF98803jDHGNm/ezACwL7/8MnTcpEmT2KRJk0K/b9y4sVlbjzwWAPu///u/0GVer5elp6ezCy+8sM32McZYbm4uO+OMM1h1dTWrrq5m27ZtY5dddhkDwP7+978zxhi75pprWEZGBqupqWly28suu4yZTKbQ31Pj69KvX79mf2ONf8dPPPFE6LLG95XjOPbRRx+FLt+9ezcDwB566KHQZY2fl6M19itFRUWhy4YOHdrk9Wvk8XiYJElNLisqKmIqlapJ/9LW6z179myWm5sb+r3x8/XYY481Oe6iiy5iHMexwsLC0GUAmFKpbHLZtm3bGAD28ssvN3uso9sJoMWfxs/h7bffzgCwn376KXQ7u93O+vbty/Ly8kLPvbX3SZZlVlBQwKZNmxbqhxgL9pd9+/Zlp59+eugyk8nEbr755jbbfNZZZzV5rQgh4aEZC0K6wdFrvSdOnIgDBw6Efl+6dClMJhNOP/30JqN5o0ePhl6vx6pVq0LHHjnqbrfbUVNTg4kTJ8LlcmH37t1NHkelUmHOnDlht3Px4sUYM2YM8vPzAQAGgwFnnXVWmyP5R85afPHFFy0e0zjCHc5sBYB2E8aPvK/G+xYEAZdddhnWrVsXWloCBJdBpaWl4bTTTgtd9vvvv+OPP/4IzcwAwdmZmpoaLF++PKw2AsDf/va30P8FQcCYMWPAGMM111wTutxsNmPgwIFN3u+vv/4a6enpTR5foVDg1ltvhcPhwJo1a0LHiaKIG2+8scnj/P3vf2/Sjrq6OqxcuRKXXHJJ6DNRU1OD2tpaTJs2Dfv27cPhw4fDfl5A8DO6efNmOJ1OAMHR6hkzZmDkyJH46aefAARnMTiOw8knn9yh+z6SXq9vsu5eqVTixBNPbPJ6teW7775DSkoKUlJSMGLECCxduhRXXnklnn76aTDG8Omnn2LmzJlgjDX525o2bRqsVmto+Uuj2bNnN/kbO9KR73fj+6rT6XDJJZeELh84cCDMZnPY7Q+XSqUK5RBIkoTa2lro9XoMHDiw2XMI19dffw1BEHDrrbc2ufzOO+8EY6zZrNHUqVPRv3//0O/HHXccjEZj2M/1uuuuw/fff9/kZ8SIEaG2nHjiiU0+S3q9Htdddx2Ki4uxc+fOJvd19Pu0detW7Nu3D5dffjlqa2tD77PT6cRpp52GH3/8MZS4bjabsWHDhtBMDyEkcih5m5AoU6vVzSqfJCQkNMmd2LdvH6xWK1JTU1u8jyPzBnbs2IH7778fK1euDJ1UN7JarU1+z8rKCjuZ1mKx4Ouvv8Ytt9wSWl4EACeddBI+/fRT7N27FwMGDGjxtrNmzcKjjz6K+fPn47zzzmt2vdFoBIA2y8EeyWAwtFiq9kiN93VkADJr1iy8+OKL+OCDD3Dvvffi0KFD+Omnn3DrrbdCEITQce+//z50Oh369esXeq5qtRp5eXlYvHgxzjrrrLDamZOT0+R3k8kEtVqN5OTkZpcfuVzk4MGDKCgoaJZsOnjw4ND1jf9mZGRAr9c3OW7gwIFNfi8sLARjDA888AAeeOCBFttaVVWFrKyssJ4XEAwsAoEA1q1bh+zsbFRVVWHixInYsWNHk8BiyJAhSExMDPt+j9anT59m+QcJCQn4/fffw7p94xI0juOg1WoxePBgmM1mAMHnbLFY8MYbbzRbetjo6Jycvn37tnhcS3/HJpOpxfabTKZmuVFdJcsyFixYgIULF6KoqKhJTtWRlc464uDBg8jMzGwWxB/9OWx09OcdaN6XtaWgoABTp05ttS1H51Md3ZYj9zw5+n3at28fALRaaAII9o8JCQl45plnMHv2bGRnZ2P06NGYMWMGrrrqKvTr1y+s50EIaR0FFoRE2ZEntK2RZRmpqamtzgw0ntBYLBZMmjQJRqMR8+fPR//+/aFWq7F582b885//bFZKsrWR15YsXboUXq8Xzz//fIvJxosXL8YjjzzS4m0bZy2uvvpqfPnll82uHzRoEIBgkms4Bg8ejK1bt6KkpKTFkxkAoRPPIUOGhC4bPXo0Bg0ahA8//BD33nsvPvzwQzDGmlSDYg3ldJ1OZ5PbNqqqqoLD4Wh2Mt+Slt7b1t5v1k5ye1c0vu/z5s3DtGnTWjymcRYqXGPGjIFarcaPP/6InJwcpKamYsCAAZg4cSIWLlwIr9eLn376Ceeff36X2t7V1ys5ObnVk9XG1+WKK65o9YTzuOOOa/J7a38zrbUznPa3Vor56IILbXniiSfwwAMP4K9//SseffRRJCYmgud53H777d1WQjYWn+3WHP0+Nb4Gzz77bKtljRv/pi+55JLQfiffffcdnn32WTz99NP47LPPMH369Ki2m5DejgILQuJA//798cMPP+Ckk05qMxhYvXo1amtr8dlnnzVJyOxMoujRFi9ejGHDhuGhhx5qdt3rr7+ODz74oNXAAkAoufiRRx5pUn0JAAYMGICBAwfiyy+/xIIFC9o9aT/77LPx4Ycf4v/+7/9w//33N7veZrPhyy+/xKBBg5qdMM+aNQsPPPAAfv/9d3zwwQcoKCgIVcoBgDVr1uDQoUOYP39+aDS0UX19Pa677jp88cUXUSmL2Sg3Nxe///47ZFluMmvRuJQtNzc39O+KFSuaBTp79uxpcn+NI60KhaLVk+yOalyS9NNPPyEnJwcTJ04EEJzJ8Hq9WLx4MSorK1tNDG4Uzv4m0ZKSkgKDwQBJkiL2unRGQkICgODAQONsCtB8RgBo/fX65JNPMGXKFLz99ttNLrdYLE1myDryeufm5uKHH35otvTw6M9hd8jNzW32ue5IWxqXaBmNxrDe64yMDNx000246aabUFVVheOPPx6PP/54KLCI5eeWkJ6MciwIiQOXXHIJJEnCo48+2uy6QCAQKi/aOGJ45Aihz+fDwoULu/T4paWl+PHHH3HJJZfgoosuavYzZ84cFBYWYsOGDa3eR+OsxdatW1sscfrII4+gtrYWf/vb3xAIBJpd/91334V21L7oooswZMgQPPXUU812O5dlGTfeeCPq6+tbDIIaZycefPBBbN26tcW9K3Q6He66665mz/Paa69FQUFBh6tDddSMGTNQUVGBJUuWhC4LBAJ4+eWXodfrMWnSpNBxgUAAr776aug4SZLw8ssvN7m/1NRUTJ48Ga+//jrKy8ubPV51dXWn2jlx4kRs2LABq1atCgUWycnJGDx4MJ5++unQMW3R6XQA0GqJ3GgSBAEXXnhhqHzx0Tr7unRU40nvkVW2nE5ni+WFdTpdi6+VIAjNZgaWLl3aLHemI6/3jBkzIEkS/v3vfze5/MUXXwTHcd06ej9jxgz8+uuvWLduXegyp9OJN954A3l5eS3OLh5p9OjR6N+/P5577jk4HI5m1ze+15IkNVsympqaiszMTHi93tBlOp2u2XGEkPbRjAUhHfDOO++0WLf9tttu69L9Tpo0Cddffz2efPJJbN26FWeccQYUCgX27duHpUuXYsGCBbjoooswYcIEJCQkYPbs2bj11lvBcRzee++9Li9F+OCDD0KlJ1syY8YMiKKIxYsXt7gOulFjrsXWrVubXXfppZfijz/+wOOPP44tW7bgL3/5S2jn7W+//RYrVqwI1f9XKpX45JNPcNppp+Hkk09usvP2Bx98gM2bN+POO+/EZZdd1uxx+vbtiwkTJoSWZB0ZWDSW0z399NNb3czwnHPOwYIFC1BVVdVqzktXXXfddXj99ddx9dVXY9OmTcjLy8Mnn3yCX375BS+99FJo9HjmzJk46aSTcPfdd6O4uBhDhgzBZ5991uIJzyuvvIKTTz4Zw4cPx7XXXot+/fqhsrIS69atw6FDh5rsdxCuiRMn4vHHH0dpaWmTAOKUU07B66+/jry8PPTp06fN++jfvz/MZjNee+01GAwG6HQ6jB07ttVchkh76qmnsGrVKowdOxbXXnsthgwZgrq6OmzevBk//PAD6urqot6GM844Azk5Objmmmtw1113QRAEvPPOO0hJSUFJSUmTY0ePHo1XX30Vjz32GPLz85GamopTTz0VZ599NubPn485c+ZgwoQJ+OOPP7B48eJmeQEdeb1nzpyJKVOm4L777kNxcTFGjBiB7777Dl9++SVuv/32Jona0Xb33Xfjww8/xPTp03HrrbciMTER7777LoqKivDpp5+2u/kdz/N46623MH36dAwdOhRz5sxBVlYWDh8+jFWrVsFoNOKrr76C3W5Hnz59cNFFF2HEiBHQ6/X44YcfsHHjxiZLQEePHo0lS5Zg7ty5OOGEE6DX6zFz5sxovwyE9HwxqERFSI/TWBaytZ/S0tJWy83qdLpm99da+ck33niDjR49mmk0GmYwGNjw4cPZP/7xD1ZWVhY65pdffmHjxo1jGo2GZWZmsn/84x9s+fLlzUqITpo0iQ0dOjSs5zd8+HCWk5PT5jGTJ09mqampzO/3Nyk3e7QjX6vGcrNHWrFiBTv33HNZamoqE0WRpaSksJkzZzYpWdqoqqqKzZ07l+Xn5zOVSsXMZjObOnVqqMRsa1555RUGoEm5VMYY+/TTTxkA9vbbb7d629WrVzMAbMGCBa0ec3Q53Uatvd8tvReVlZVszpw5LDk5mSmVSjZ8+PAWS4TW1tayK6+8khmNRmYymdiVV17JtmzZ0mJJ0f3797OrrrqKpaenM4VCwbKystjZZ5/NPvnkk9Ax4ZabZSxYblgQBGYwGFggEAhd/v777zMA7Morr2zxuR5dLvXLL79kQ4YMYaIoNml3a5/Ro8uitiY3N5edddZZ7R5XWVnJbr75Zpadnc0UCgVLT09np512GnvjjTdCxzS+LkuXLm2xPeG+r621a9OmTWzs2LFMqVSynJwc9sILL7RYbraiooKdddZZzGAwMACh19Lj8bA777yTZWRkMI1Gw0466SS2bt26Dr3eLb2udrud3XHHHSwzM5MpFApWUFDAnn322SYlWxkLlpttqURrbm5uk1LKLWmrvzjS/v372UUXXcTMZjNTq9XsxBNPZP/73/+aHNPW+8QYY1u2bGEXXHABS0pKYiqViuXm5rJLLrmErVixgjEWLGd81113sREjRjCDwcB0Oh0bMWIEW7hwYZP7cTgc7PLLL2dms5kBoNKzhISJYywGWVeEEEIIIYSQXoVyLAghhBBCCCFdRoEFIYQQQgghpMsosCCEEEIIIYR0GQUWhBBCCCGEkC6jwIIQQgghhBDSZRRYEEIIIYQQQrqMAgtCCCGEEEJIl1FgQQghhBBCCOkyCiwIIYQQQgghXUaBBSGEEEIIIaTLKLAghBBCCCGEdBkFFoQQQgghhJAuo8CCEEIIIYQQ0mUUWBBCCCGEEEK6jAILQgghhBBCSJdRYEEIIYQQQgjpMgosCCGEEEIIIV1GgQUhhBBCCCGkyyiwIIQQQgghhHQZBRaEEEIIIYSQLqPAghBCCCGEENJlFFgQQgghhBBCuowCC0LiwBVXXAG1Wo29e/c2u+6pp54Cx3H43//+BwBYsmQJrrjiChQUFIDjOEyePLmbW0sIIeRo4fbjtbW1ePbZZ3HKKacgJSUFZrMZ48aNw5IlS2LQakIii2OMsVg3gpBjXVVVFQYNGoSRI0di5cqVocuLioowdOhQzJgxA5988gkAYPLkydi0aRNOOOEEbN26FccddxxWr14do5YTQggBwu/H//e//+GCCy7AjBkzMGXKFIiiiE8//RSrVq3Cgw8+iEceeSSGz4KQrqHAgpA48eabb+K6667DokWLMHv2bADA9OnTsXbtWuzcuRNZWVkAgNLSUmRlZYHneQwbNgzJyckUWBBCSBwIpx8vKioCz/PIzc0N3Y4xhqlTp+KXX35BbW0tdDpdrJ4CIV1CS6EIiRN/+9vfcNJJJ2HevHmora3FRx99hG+//RaPPfZYKKgAgOzsbPA8/ekSQki8Cacf79u3b5OgAgA4jsN5550Hr9eLAwcOxKLphESEGOsGEEKCOI7D66+/jlGjRuHGG2/ETz/9hDFjxuDmm2+OddMIIYSEoSv9eEVFBQAgOTk52s0kJGoosCAkjgwdOhTz5s3Dk08+CUEQsGzZMpqdIISQHqQz/XhdXR3eeustTJw4ERkZGd3UUkIij85YCIkzjaNVmZmZGDZsWIxbQwghpKM60o/LsoxZs2bBYrHg5Zdf7o7mERI1FFgQEkdKS0vx0EMPYdiwYSgtLcUzzzwT6yYRQgjpgI7243//+9/x7bff4q233sKIESO6qZWERAcFFoTEkVtuuQUA8M033+Diiy/G448/Tol8hBDSg3SkH3/kkUewcOFCPPXUU7jyyiu7s5mERAUFFoTEic8//xz//e9/8eijj6JPnz546aWXoFQqKXmbEEJ6iI7046+88goefvhh3H777fjnP/8Zg9YSEnm0jwUhccBut2PIkCFISUnBxo0bIQgCAOBf//oXbrvtNnz88ce4+OKLm92O9rEghJD40JF+fMmSJbj88svxl7/8Be+99x44jotl0wmJGAosCIkDt912G/79739j/fr1OOGEE0KXS5KEE088ERUVFdi9ezcMBgN+/PFH/PjjjwCAl19+GVqtFtdccw0A4JRTTsEpp5wSk+dACCHHsnD78V27dmHixIkwmUx4+umnoVAomtzPhAkT0K9fv+5uPiERQYEFITG2adMmjB07FjfeeGOLFUE2btyIcePG4ZZbbsGCBQvw8MMP45FHHmnxvh566CE8/PDDUW4xIYSQI3WkHx81ahTmzJnT6n395z//wdVXXx3F1hISPRRYEEIIIYQQQrqMkrcJIYQQQgghXUaBBSGEEEIIIaTLKLAghBBCCCGEdBkFFoQQQgghhJAuo8CCEEIIIYQQ0mVirBvQ3WRZRllZGQwGA21IQwgh3YAxBrvdjszMTPB818ezqB8nhJDu05E+/JgLLMrKypCdnR3rZhBCyDGntLQUffr06fL9UD9OCCHdL5w+/JgLLAwGA4Dgi2M0GmPcGkII6f1sNhuys7ND/W9XUT9OCCHdpyN9+DEXWDROmxuNRvpCIoSQbhSpZUvUjxNCSPcLpw+n5G1CCCGEEEJIl1FgQQghhBBCCOkyCiwIIYQQQgghXXbM5VgQQo5tkiTB7/fHuhm9ikKhgCAIsW4GIeQYQH145EWyD49pYPHjjz/i2WefxaZNm1BeXo7PP/8c5513Xpu3Wb16NebOnYsdO3YgOzsb999/P66++upuaS8hpOdijKGiogIWiyXWTemVzGYz0tPTaV8JQkhUUB8eXZHqw2MaWDidTowYMQJ//etfccEFF7R7fFFREc466yzccMMNWLx4MVasWIG//e1vyMjIwLRp07qhxYSQnqrxCyk1NRVarZZOgCOEMQaXy4WqqioAQEZGRoxbRAjpjagPj45I9+ExDSymT5+O6dOnh338a6+9hr59++L5558HAAwePBg///wzXnzxxVYDC6/XC6/XG/rdZrN1rdGEkB5HkqTQF1JSUlKsm9PraDQaAEBVVRVSU1MjviyK+nFCjm3Uh0dXJPvwHpW8vW7dOkydOrXJZdOmTcO6detavc2TTz4Jk8kU+qHdWgk59jSux9VqtTFuSe/V+NpGY+0z9eOEHNuoD4++SPXhPSqwqKioQFpaWpPL0tLSYLPZ4Ha7W7zNPffcA6vVGvopLS3tjqYSQuIQTZ1HTzRfW+rHCSEA9eHRFKnXttdXhVKpVFCpVLFuBiGEkE6ifpwQQnqGHjVjkZ6ejsrKyiaXVVZWwmg0htaHEUIIIYQQQrpfjwosxo8fjxUrVjS57Pvvv8f48eNj1CJCCImd4uJicByHrVu3hn2bRYsWwWw2R61NpHdzeAMoqXVBllmsm0JIr9Db+vGYBhYOhwNbt24NvZhFRUXYunUrSkpKAATX1V511VWh42+44QYcOHAA//jHP7B7924sXLgQH3/8Me64445YNJ8QQgg5Zrh9EnaX21BYbYfNQxuUEUKai2lg8dtvv2HUqFEYNWoUAGDu3LkYNWoUHnzwQQBAeXl5KMgAgL59+2LZsmX4/vvvMWLECDz//PN46623aA8LQgghJIq8AQl7K+2ocXjhC8iodfhi3SRCSByKaWAxefJkMMaa/SxatAhAcKpn9erVzW6zZcsWeL1e7N+/n3bdJoR0jdPZ+o/HE/6xR1ema+mYTvj2229x8sknw2w2IykpCWeffTb279/f4rGrV68Gx3FYtmwZjjvuOKjVaowbNw7bt29vduzy5csxePBg6PV6nHnmmSgvLw9dt3HjRpx++ulITk6GyWTCpEmTsHnz5k61n/R8fknGvkoHyq1upBs10KsUqLR54AvIsW4aId3bh1M/3q4elWNBCCERp9e3/nPhhU2PTU1t/dijN/vMy2t+TCc4nU7MnTsXv/32G1asWAGe53H++edDlls/qbvrrrvw/PPPY+PGjUhJScHMmTOb1CZ3uVx47rnn8N577+HHH39ESUkJ5s2bF7rebrdj9uzZ+Pnnn7F+/XoUFBRgxowZsNvtnXoO8abc6kZpnQuMUZ5AeySZYX+VA4fqXUg1qCHwHPQqEXaPHxYXzVqQONCdfTj14+3q9eVmCSGkJ7vwqC/Gd955BykpKdi5cyf0rXzJPfTQQzj99NMBAO+++y769OmDzz//HJdccgmA4AZIr732Gvr37w8AuOWWWzB//vzQ7U899dQm9/fGG2/AbDZjzZo1OPvssyP23GLF6Q2guCYYWGQnaqk2fitkmaGoxoHiWheS9SoohOBYpMBzEHgeVXYvUo3qGLeSkPh3LPXjFFgQQo5tDkfr1wlC09+rqlo/lj9qAri4uNNNOtK+ffvw4IMPYsOGDaipqQmNcJWUlGDIkCEt3ubISnmJiYkYOHAgdu3aFbpMq9WGvowAICMjA1VHPLfKykrcf//9WL16NaqqqiBJElwuV5Oct57O7ZOwtzL43lNw0RxjDKX1LhyodiJRq4RKbPq3YFQrUOPwwuENQK+iUwkSQ3HehwPHVj9OvQEh5Nim08X+2DbMnDkTubm5ePPNN5GZmQlZljFs2DD4fJ1fhqJQKJr8znFck2VBs2fPRm1tLRYsWIDc3FyoVCqMHz++S48Zb9RKATqlSMFFK8qsHuyrdMCgVkCjFJpdr1EKqHN5Ue/0UWBBYivO+3Dg2OrHqTcghJA4VVtbiz179uDNN9/ExIkTAQA///xzu7dbv349cnJyAAD19fXYu3cvBg8eHPbj/vLLL1i4cCFmzJgBACgtLUVNTU0nnkF806uDX4EUXDRVZfNgT4UNaoXQZtCgUYiosHmQadZA4Ol1I6Qlx1o/ToEFIYTEqYSEBCQlJeGNN95ARkYGSkpKcPfdd7d7u/nz5yMpKQlpaWm47777kJycjPPOOy/sxy0oKMB7772HMWPGwGaz4a677oJGo+nCM4lfFFw0VevwYleFDSLHw6RRtHmsQS2izumD1e1Hok7ZTS0kJD41zhYc3X8ca/04VYUihJA4xfM8PvroI2zatAnDhg3DHXfcgWeffbbd2z311FO47bbbMHr0aFRUVOCrr76CUhn+id/bb7+N+vp6HH/88bjyyitx6623IjU1tStPJa7p1WJoWdSh+mO3WpTV5cfuCjskCUgII1BQCDwkxlDr8HZD6wiJPcYYZJkhIMnwSzJ8AQkevwSXLwCnV4LTJ8HtC8AbkOCXZAQkGQCHDz/88Jjpxzl2jPWgNpsNJpMJVqsVRqMx1s0hhHQDj8eDoqIi9O3bF2p1761is3r1akyZMgX19fUwm83d+thtvcaR7ne7en+FVXYU1biQflRFI4cnAKcvgIHpevRJOLZmLhzeALYftsLhCSCtA5We7B4/ZABjchOgVjTPxSAkErqzD5cZA2No2FsNkMEgy42XMzAgeH3D8VzDDxq6iz+vY+DAgeMADhx4HuA5ruEnOLPBccHLjhSrfjxSfTgthSKEEEIQnLlgYNhT4QDAoU+C5pgILtw+CbvLbbC6/cjoYPlYnUpEudUDi8uPdBMFFiT+NQYMDAwyQyiIkFnw91BgccRtOODPAIEL/h/tdg0c0HA/jDFIEuA/4l4b75PnOAj8n8GG1FAxqqeO+1NgQQghhDQwqIN5BXsqgptI9fbgwhuQsLfSjmqHFxnGjj9XnuOgEDhU2jxIM6p69WtFeo7QbENohuHPoEGWg0FFm8ED3xA8dBXXeL/BOzsy9GYNQY3MGKQAC7XF4w8GFk6fBLUvAJ7nwDfOfBwx2xGvKLAghJBeYvLkyT12lCueHCvBhV+Ssa/SgXKrGxnGzld2MqoVqHf6YPcGYFS3nfBNSKQ0WbKEP4MHSW5/ydKfJ+ixan1DEHPkGqoGUyZPht3jB2MMfomBSQ1J4Y23QcOSKj64rCo469H4nGLfT1Fg0QlVNg+MGgWtJyWEkF6qtwcXksywvyqYrJ5qUHepXKxaIaDWGdzTggILEi0yY/AFJDBfIDT7EFpqhMaTb67JyXd4S5biDNdQWamF/qYxkJJkhoDMGp4312RZVXBp1RE5HEfMdnQHCiw6SJYZimqdUAo8BqUbW9w4iBASnxp3OyWR1xtf26ODi+xEbSybEzGMMRTVOFBc60KyXgWF0PUCkVplMNciy6yBGIH7I+RIHn+wCpPHFwAEZWjJEse3POrfWzUPEIL/b7KsSmbwSU0DDo7jIHCAShTAtzKIEKk+nAKLTpAloNTqgiQzDM4wQke7jhIS15RKJXieR1lZGVJSUqBUKnvV6HMsMcbg8/lQXV0Nnuc7VA6xJ+htwQVjDCV1LuyvdiJRq4RKjMzgmFGtQLXDA4vbj2S9KiL3SQgQzAMqqvPAHwBqqyuRlJwMhaJ39TPR1LhMjDFAUgoQ+aaBf6T7cDoj7qREnQq1Dh92lFkxOMMY+vIhhMQfnufRt29flJeXo6ysLNbN6ZW0Wi1ycnLA871vtLo3BRdlVg/2VTpgVCsiOuPeuJSqxu6lwIJETGMeUKXdizRzOjzuepSXlce6WT2SzBiUIt9qHkak+nAKLDqJ54B0kxqVNg92lNkwON0Ik5aCC0LilVKpRE5ODgKBACRJinVzehVBECCKYq+eBeoNwUWVzYO9FTaoFQL0UZhp16sUqHZ4keeXKAeRdJkkMxRW2UN5QKLAg4kpAJODS0dI2CSZod7lw/BMU4sD4ZHswymw6AKe45BuVKPK7g3NXISzWykhJDY4joNCoYBCQYMApOOODC44DuiT0HOCi1qHF7sqbBA4HiZNdD7/OqWAMqsPdU4fMs2aqDwGOTbIcjAP6GCtu0keEMdxACcAPAWuHcHJDDLPoFSpoY7yCpveN2fdzTiOQ6pBBbdPwo4yK2oc3lg3iRBCSJQY1ApolQJ2lwdHUnsCq9uP3RV2SBKiOvjFcRxUooBKm4fKHpNOY4yhtN6FAxHOAyLdgwKLCOA4DqlGNfwSw44yK6psnlg3iRBCSJQY1ApoFD0juHB4A9hVboPbJyHFEP3cB4NaRL3LB5s7EPXHIr1TYx6QIcJ5QKR7UGARQcl6FcA47CyzodzqjnVzCCGERIlRE//BhdsnYXe5DVa3H6ndEFQAwXKW/gBDnZNm70nHVdk82BPFPCASfRRYRFiiTgmB57Cr3IbDFgouCCGkt4rn4MIbkLC30o5qhxdpBnW3JtbrVCIqrF74pd63twmJnsY8IDGKeUAk+iiwiAKzVgmlIGBXmQ0ltU5aa0oIIb1UPAYXjSU6y61uZBg1XdpV+2gCz0El8m3ep14lwu71o97li9jjkt7N6uqePCASfTTPFCUmjQICx2FPpR0yY8hJ1LW62yEhhJCey9gwurq7oRRtLKtFSTLD/ipHqERnpIIKlcjDrFPArFHA6vbDpFHA4vLD4vLDG2g6MyHwHDiOQ43di1SDOiKPT3ovhzeAXRXBPKA0I31eejoKLKJIrxbBccDeSgcCMkO/ZP0xH1w4vMGEPlo7SQjpTYwaBeCObXDBWLBEZ3Gtq0mJzq5SiTz6JGrw2pr9WLS2GDZ3AEaNiDkT+uL6Sf1wqM7dLLgwqkVUO7xw+QLQKqm/Jy07Mg8og4KKXoH+2qNMpwoGF/urnZBloH+qPqLT0j1FQJJRbnWjqMYFo1rEiGxzr95MixBy7DkyuOA4DlnduJcDYwwlddEp0WnWKfDamv3414rC0GU2dwALVuwDAMwam4NKW9NkbY1CQL3LjzqnjwIL0qIj84AyjBo6J+glKMeiG2iVIhK1ShTVOrC30obAMZbQZnH5sP2wFTvK7JBlBovLH5q5IISQ3sSoUUAjCt1ewCNaJToFnoNZo8CitcUtXv+ftUUwaxXNBsw4joNa5FFh9UCWKc+QNBXNPCASWzSM0E3UCgFJOhUO1rrAGJCfaoBS7N1xnS8go7TOhdI6FwIyQ5pBBVHgcdjigtXtb3FbeUII6emMGgWYG9hVbgOAqM9cVNk82BulEp0iz8Hq9re6L4XNHYDVHYDIc5COCiCMGgXqXD7YPH6YtZSQS4KilQdE4kPvPrONMypRQKpBjZI6N3ZX2OANSLFuUlQwxlDj8GLbIQv2VTmgVghIM6ohNqz3VdPOrISQXs6kUUDdDTMXdU4fdlfYwUepRGdAZjBpFDBqWg5YjBoRJo2IQAuzEgqBhyQFvw8IAaKXB0TiR8zf0VdeeQV5eXlQq9UYO3Ysfv311zaPf+mllzBw4EBoNBpkZ2fjjjvugMfTc3a6Vgg8Ug0qlFnc2F1uh8ffu4ILj1/CvkoHtpZaYHcHkG5UQ3fUCJpBHawqYvPQcihCSPdTCjwUQvRHSaMdXFjdfuwqtyEgMSRGqUSnJDPsr3Zi9vi8Fq+fM6EvLC5/s9mKRnqViEqbt9cOpJHwHZkHlKBVRDQPiMSPmAYWS5Yswdy5c/HQQw9h8+bNGDFiBKZNm4aqqqoWj//ggw9w991346GHHsKuXbvw9ttvY8mSJbj33nu7ueVdoxB4pBs1KLd6sKvcBpev559gyzJDhdWDrSUWHKhxwKhSIMWganGKUyny8AVkWKjGOSGkG7l9AfgCMjRKEeP7JyHNqIIqyktSoxVcOLwB7CoPluhM1kdvmVGZxY07l27F1RPycOtp+aGZC6NGxK2n5eP6Sf1gcflbvb1eLcLu8bd5DDk2HJkHRAn9vVdM39kXXngB1157LebMmQMAeO2117Bs2TK88847uPvuu5sdv3btWpx00km4/PLLAQB5eXn4y1/+gg0bNrT6GF6vF17vn9OwNpstws+icwSeQ7pRjQqbG5LMMCjD2GNLsDq8ARysdaLM4oZSEJBpar+6g1YhotLmQXaC9pgvwUsIaVsk+nGvX8Jraw7gP2uLwiqXGkmmhmpRkcq58Pgl7KmwweryI8MUvV21JZnhhe/3Yk+lHfd/sR2PnDMUt0zJR73LD4NaxPr9tThY42pxGVQjnuOg4HlU2jy0R8ExLJp5QCS+xGzGwufzYdOmTZg6deqfjeF5TJ06FevWrWvxNhMmTMCmTZtCy6UOHDiAr7/+GjNmzGj1cZ588kmYTKbQT3Z2dmSfSBcIPIcMkwa1Dh92lllh8/SsER1JZjhU78LWknocqnchUatCok4Z1pecXi3C5gn0uOdMCOl+Xe3H3b4AFq7ejwUr9oWSkBvLpb6+5gDM2ugXkmicudhd0bWZC29Awp4KO6rtPqQZoxdUAMAnm0qxp9IOrVLARaP7oNLmRWGVE7UOL8586Uf89d3fsHZ/bbv3Y1ArUOf0UTXAY1S084BIfIlZYFFTUwNJkpCWltbk8rS0NFRUVLR4m8svvxzz58/HySefDIVCgf79+2Py5MltLoW65557YLVaQz+lpaURfR5dxXMc0k1qWN1+7CwLjkD1BFaXv6GErA0Ah0yTtkNVrhQCj4Ako95Jy6EIIW3raj8u8Dz+s7aoxetaK5caDSaNAiqh88FF4IgSnenG6FbT2Vdpx4cbg6/zDZP6h3bQlmQGSQaOz0kAAHy3s+Xv6yNplAI8AYn6+2NQYx6QX5KjlgdE4kvMk7c7YvXq1XjiiSewcOFCbN68GZ999hmWLVuGRx99tNXbqFQqGI3GJj/xhuc4pBnUcHgC2F5mRV0cd75+SUZxjQNbSutRZfcgVa/q9AiETimi0u495vb1IIR0TFf7cbsnvHKp3aGzwYUkMxR2U4lOj1/C89/vhSQznJyfjMkDUpodc8aQdADAxuI61IZR9Ukriii3ultN8ia9j/OIPKAUvSrWzSHdJGaBRXJyMgRBQGVlZZPLKysrkZ6e3uJtHnjgAVx55ZX429/+huHDh+P888/HE088gSeffBKy3LNPTjmOQ5pRDY9fwo4ya1yW56t1eLGt1ILdFXaoBAHpRk2ohGxn6FUi7O4AVYcihESVQd25cqnRcmRwURZGcNHdJToXrS3GYYsbiTolbprcv8XlVtmJWgzJMEJmwA+7Wy64ciSDWmwI4nrGrDzpGo9fwu6GPKBUg4p21T6GxCywUCqVGD16NFasWBG6TJZlrFixAuPHj2/xNi6XCzzftMmCECxX1lv2REg1qCFJDDvKrKiyxUcZ3WAJWTu2HrLA6vIj3aiBXt315CtR4CEzhjpn/AVRhJDeQ5JlzJnQt8XrZo/PQ5XN2+0j6Y3Bxa4wgovuLNG56WA9lv1RDgC4/bSCNjcynTY0uJT5+50VkNv5DhYFHhJjqLFTf9/bdWceEIk/MV0KNXfuXLz55pt49913sWvXLtx4441wOp2hKlFXXXUV7rnnntDxM2fOxKuvvoqPPvoIRUVF+P777/HAAw9g5syZoQCjN0jSq8AxDjvLbCi3Rm9jpfYwxlBl82BrqQX7qx3QK0WkRnhdr04losrmhZ+WQxFCokSjFHHT5P647bSCJuVS/35qPq6ekIcHvtwek30WwgkuyizubivRaXP78a8V+wAAZx+XgVENeRStmdA/GTqlgEpbcDa7PUaVAlV2T6/bv4n86cg8oLRWSs6T3i2mNb8uvfRSVFdX48EHH0RFRQVGjhyJb7/9NpTQXVJS0mSG4v777wfHcbj//vtx+PBhpKSkYObMmXj88cdj9RSiJkGnhMXlw85yGySZIcvcfgnXSHI2lJA9bPFA0VC9io/C4+tVIqrsHljdfiTTGkxCSJSoFAKun9QPN0/Jh83th0EjotrmxZxFG/H7ISs4cLh9akG3j66aNApY3cCuimAp2swjStFW2TzY000lOhljeGV1IepcPvRJ0ODqCXnt3katEDB5YCqW/VGO5Tsr2w1EdCoB5TY/6l0+ZJi6VnKXxJ/GzRQPN+QBdWWpNOm5ONZb1hCFyWazwWQywWq1diqRW5YZ/jhkhTcgQdMNG7zY3H64/RIK0vTISdRG/UtPkhkqbB4U1Tjh8ARP9qM99V5hcyM3UYcB6YaoPg4hJDa62u9G+v5Kap04UONEkk6FbYcsePDL7ZAZcP0p/XD2cZldbl9nWN1+eCUJg9ONyDRrUOf0YfthKxhDt1TTWbm7Ci/+sBcCz+G5i0YgP1Uf1u2Kahy49aOtEHkOi+ac2G4xj2q7F0l6JY7rY6IlMr0IYwwHqp0orHYgUauEWtF7VpH0BpLMUOPw4oS+iZ0quNORPpfCyQ5w+wIIyAwZZjVG5Ji7ZddWo0YBnVLE3ko7imuckKO4Dtjm8WN7mRXbD1nAZIZMkybqQQUQrA5V5fDGZCkCIeTY45Nk+KVgXzqijzk0Ov/Wz0XYUWaNSZuOXBZVXOPs1hKdVTYPXv9xPwDgLyfmhB1UAEDfZD0KUvUIyAwrdlW2e7xBLaLO6YWd9rToVUrrXNhf7YBZo6Cg4hhHgUWYGndtHfP49zjxiRU44fEfsHhDCfokaqIeXOjVIoxqBfZVOXCgxhHxJEO/JONgrRNbSupRafUgxaCGWRveRneRoFOJcHr8VC2EEBIT543MwikFyZBkhqe+2R1W+dRoaAwu9lTYu61EpyQzvPDDXrh8EganG3DR8X06fB/ThgYrOX63s7LdQipqhQCvX0ZdHFY+JJ1TZnFjb6UDBlX084BI5wg8B51KRHec1VFgEYZ42LVVqxRh1iixv9qJ/VWOiO39UOf04fdDFuwqt0HB88gwaaJeyvBoPMeB5/m4LLFLCOn9OI7D308tQF6SFha3H09+sztmBSVMGgWS9apuK9H55dbD2FFmg0Yh4I7TB3Qq2XZiQTLUCh6HLe6GTVPbplOJqLDRHka9QZXdg70V9mAeUASqRZLIUok80kwqDEzXo3+KDmqFAJcvurOFFFiEIV52bdUoBSRqlSiqdWBflb1LX3zegITCKju2ltbD0lBCtq2ygtFmUImodfioWgghJCbUCgH3zhgMvUrEnko7Xl+zP2ZtUYp8twQVRTUOvLf+IADgbxP7djqhWqsUcUpBcBO95WHsxG1QK2D3+GGhWeoerc7pw54KOwB0eqPc3kLgOahEPq6qYKlEHn0SNXh//UGMefwHnPjECox5/Hu8vuYAvFE816LAIgzxtGurWiEgWafGwVoX9lTY4Qt0LLhgjKHK7sG2UgsKqxzQKcWo7+IaDq1SgNMrweKiLxpCSGxkmDSYd8ZAcACW76zEt9vbP0nuqXwBGc9/txcBmWFs30ScPjitS/fXuBxqbWEtHO1setr4fVNlj4+9mkjHWd1+7C63wReQkXQMV3RsnBHIT9UhQadAfqquW/Jv2yLJDC5fADqViNfW7Me/VhQ2W22zcPX+qM1c0LxVGBp3bW0puGjctbW6Gzf9UYo8Ug1qlNa7IDOGAWmGsJKl3D4JxbUOHK73QIhiCdnO4DgOCp5DtcODdJM61s0hhByjRucm4Mpxufi/9Qfx+o/7kZekxaCMrleyijfvrT+Ig3UumDUK3DIlv8szJAWpeuQlaVFc68KqPVWYOaLt6loGlQI1dh/cSRI0Skr27Umc3gD2VNjg9AWQZjh2v68bZwReW7Mfi9YWw+YOwKgRMWdCX1w/qR8O1bnhDWPw1y/JcPskuP0SPH4p9P8jf3f5JXj8Mty+ADx+ueF3qcXbeQPBog8//3MKFq0tbvEx/7O2CDdPyY/wKxJEgUUYGndtXdCwcdCRrp6QB4vL3+27tioEHukGNcosbsgyw6AMY6vBhSwzVNo9OFDthN0TQJIuPkvB6dUi6pw+uHwBSgAjhMTMRaP7YF+VA+sO1OLJb3fjpUtGIqEbqjN1l98PWfDl1sMAgL+fWgCztuvPjeM4TBuajtd/PIDvdlbg7OMy2gxWtEoBVqsP9S4fNEra06Kn8Pgl7K6wod7pR7rp2N5V26xThGYEGjXOCDAwnDooFW/8WAS3L9Bw8i83DRoa/h+Iwvljil6FWoevzdU2do8/KrNNdPYWhsZdW4FglNcYlc4en4fZ4/Ow+WA90mOw2Y8o8Eg3alBh80BmNgzKMDQ7Ibd7/CiqcaLC6oFaFJAZxx2BRiHA4vLB4vJTYEEIiRmOC26Wd2ipC6X1bjz17W48dt6wbi9sEQ0ObwAv/rAPDMHlSyf2TYzYfU8ekIr//FKM4loX9lY6MLCNvYk4joNKFFBudSPdqAYfR2vTo8nq8oOBQaMUuqWceyT5AjL2VNhRbfMiPY5WPMSCwHMwaRStzggsWluMGyb1x/oDtahz+sK6T6XAQ6MUoFEIUCt4aJQiNAq+4XchdJ2m4f/qI/7feLm64f8GlYg0o6rN1TbRyquls7cwHblrq9Xth1EjYvPBelzy+npY3T68cMlIJERg1KejBJ5DulGNSrsbcllw5kKvEhGQZJRZ3CiudQXLFhpUcf+lyHEcFIKAarsXGXEcABFCej+tUsR9M4Zg7tKt2Fluw9s/F+GGSf1j3awue/3H/ahxBPvYa07qG9H71qtFnJSfhFV7qrF8Z0WbgQUQXGZscftg9wRg6obqirFm8/jxR5kVHp8EtYKHTiXCrFFApxKhUQrQKsWY5zu2JiDJKKyyo9zqRpox9nmZsSAzhsIqB9YfqEWlzYPHzx/eTv6tH1eOy4XN7T8iYGg9QIj0a2px+1tdbTNnQl8EZBnKKKRaU2DRAVqlCFlmKKt3o6haAgcOHr+EGocPT30TuxGtYHARnLnYftiKvCQdyq1uVNo8MKgUyDT3nGlmg1pEvcsHp0+CXkUfT0JI7GQlaHDn6QPw6LJdWPZHOfJT9ZjaxSTnWPppXzVW76kGzwFzpw6ISm7DtKHpWLWnGj/tq8bfTu7b5uyzUuThD8iocXh7fWDhl2Tsr3LA5Q0g1aCGxy/B6vKjqiE/UylwUIsijBoxuDGuSoRWKUDVTRXC2iLLwV21S+pcSDWoIcb5IGUkBSQZ28tsWHegFhsO1KK2YfYhUadEkl7Z5oxAkk6JaUPTu32pfCOL04/rJ/UD0HS1zZwJfXHT5P5QRWlJPJ25dYLLJ4GBwaBW4L6zBmPe0m3YWW7Dmz8dwE2To5MM0x6e45BhVKPSHgwuACCtB3YAaoWAOqcPFpePAgtCSMyd2DcJfzkhGx9uLMXC1YXITdSiIK3tkfh4VOvwYuHqYAndi8dkRy0hfUiGEX0SNDhU78aPe2tw5rD0No/XqxSotHmQk6SN+1n1zmKM4WCtE5U2D9KNmtBmZbojvuN8ARkev4QKmweHLG7wXDDw0ipFJGgV0KsUDbMaQre+TowxFNU4caDGgSRd/K98iAS3T8LmknqsL6rFxuI6OL1/lmbVKASMzk3AuH5JsLranhGIRf7tkbwBGYfq3Jg1Ngc3T+kPi9sPs0aJgCxHLagAKLDosuwELe48fSAeW7YT32yvQL9kfbsdabRwHIc0gxp+iUEZw1JnXaUSeVTaPMgya2I+UkMIIZedmIPCagc2FtfjiW9246VLR/aouv0yY3hpxT44vAHkp+px2ZjsqD0Wx3E4Y0ga3vmlGMt3VoQRWIiosntQ7/IhtZdWGKq2e1Fc40KCVtnqchelyEMp8jAi+LmSGYO3IdnX4vRDZjIEgYe6Yf18glYZCjQ0CiFqOSqldS4cqHEgQRufRV8ixer249eiWqw/UIetpRb4jtgnzKRRYGzfRIzvl4Tj+phD51d2T6DVGYHGqlCx5g3IqLR5UWnzwuENYGimAGOU+y4KLCLgxL6JmDUuF+83lCfMSdJiSIzKE3IcB6XYs0/G9WoRVrcfdm8Axhhu2kcIIUBwRvjO0wdi7sdbUWb14Olvd+PRc4f1mHXmy34vx9ZSC5QijztPHxD1mexTB6Xh/9YdRGGVAweqHeiXom/1WIHnwHEcqmzeXhlYOL0BFFY5IPJch4qS8BwXXIt/xHK1gCTDE5BR5/ShwuYGGKASgwm7Jo0CRo0C2iPW7XdVudWNvVUO6JWKXllQpdLmwfoDtVh3oBa7ym04cnIh3ajGuH5JGNcvEYPSjS3+rR89I2B1B2DSiLC4/GGXmu0ukszg9AbQHfMnve+TEiOXjO6DomoHftlfiye/2YUXLxmJ5GN405iuUIkCfAEvrC4/BRaEkLigU4m476whmLd0G/44bMWitUW45uR+sW5Wu0rqXKHKNX+dkIc+CdqoP6ZJo8C4fkn4ubAGy3dW4sZJrQcWAGBUi6h1eOH0BposD+rpApKM/dUO2L1+ZBi7nusoCjz0Ah9aJswYgzcgwxuQcbjejYN1TgjgoVLw0CoFJGiV0KsbEsMVQocCymq7F3sq7FAJPPTq3vGeMMZQXOvC+gO1WH+gFgdqnE2u75eiw7i+SRjfLwm5SdqwVkw0zgjUOHwQeQ7Vdm9Mlz/Fg97xaYkDHMfhttMG4LBlG4prXXji61146oLjevSSpFjSKERUNCyHOlbKEBJC4ltOoha3Ty3Ak9/sxhdby9A/RY/JA1Nj3axW+SUZL3y/Bz5JxvE5ZswYntFtjz1taDp+LqzBmj1VmDMhr80RdK1SRL3Lj3qXr1cFFofqXSizuJFqiE6VQ47joG6oNNS4NE+SGbwBCU6vhDqnAwwMosBDLQowqJsvoWqpXfVOH3ZX2AAGmHv4/i2SzLC7wtYQTNShwvbnbu88F8wJGt8/CWP7JiHN2PkZM0lmx3xA0aj3/AXHAY1SCJYn/Hgr9lU58MqqQtw+tYDyBDrBoBZR5zp2yhASQnqGCf2TcfHoPli66RBeXlWI3CQt+ia3PSIfKx/+WoL91U4YVCJuPbV7v4uO62NCmlGFSpsXvxTW4LR2qmmpRR7lVg8yTb1jMKnW4UVRjRMmjbJbE56FhiVXR1a/90vBxPAauw/lFg/ABXMZNQoBZq0CBrUiGGgoBXgb9qrw+WWkduFEOxoEnoPIcwi0cxLvC8jYdsiCdQdq8WtRHaxuf+g6pcBjVI4Z4/om4YS+iT0qV6qnoMAiwtJNavzjzEF46L/bsXJPFfqn6nDOiKxYN6vHUQg8JImh3tX7yxASQnqWWWNzsb/agc0lFjz+dXDpa7Q2m+qsneU2fLr5EADg5in5Udlhty08x+GMIel4b/1BLN9Z2W5gYdQoUOf0wer29/hdzj1+CfuqHJAZ4qK6oULgoRB4NKawNC6h8vgllNS5IcsuCDwHtSJY2tblCyAtjvJdVCIPs04Bs0YBq9sPk0YBi8sPi8sfymNwegPYWFyH9UV12HywHm7/n5WcdCoBJ+QFk6+Pz0no1Uno8SD2n/heaGS2GXNO6ou3fy7C2z8XITdRhxHZ5lg3q8fRKgVU2LzITtT1mCRJQkjvJ/Ac5p0xEHM/3oYKmwfPLt+Dh2YOjZt+yuUL4MXv90JmwKkDU3FSfnJM2jF1cBoWbziIXeU2lNS5kJPYen6HQuAhyQw1Dm+PDiyCez44YHH6kBGne0gduYSqkSQzePwSAjKL2tKtzlCJPPokavDamv1YtLa4SeWl607phw9/LcGy38vxx2ErAkfMYiTplA3J10kYlmnscaX3ezJ6paPk3BGZmDIwBTIDnv52d5N1fSQ8epUIhyfQZBqTEELigUGtwL0zBkMl8thSasH76w/Gukkhb/1chAqbB6kGFa47JXYJ5ok6JU7ISwQAfLejot3jDWoRlXYvvAGp3WPjVZnVjdI6F1IMavBxcnIejsa9NUwaRVy126xT4LU1+/GvFYWhjehs7gAWrNiH19bsR06iFltKLQjIDNkJGlw8ug+ev3gE3rn6BNwwqT9GZpspqOhm9GpHCcdxuHlKPvJT9bB7A3h82U54/D23s4wFsWEEq75hp0tCCIknfZN1uPXUAgDAJ5sP4ZfCmhi3CFh/oBbf76wEB+D2qQMingztDUiosnkgs/ASVacNDe5jsXJPFfxS2+U3dSoRTm8AFlfPHEyyuHzYX+WAXqWgwi0RIPAczBpFqKrZ0d5dV4yJBSm4cVJ/vDrreCycNRpXjc/DgDRDXAVHxxr65EeRShRw7/TBMGsUKK514aUV+8DC7IxJkE4losrmbfcLiRBCYuGUASk4b2Qwj+6lFXtxsNbZzi2ip97lw79XFQIAzh+VheFZpog/Ro3DC1Hk4PAEwjr++JwEJOuVsHsCWLe/ts1jeY6DyHGosHp63HelNyChsMoBv8SivgHZsULkOVjd/tBMxdFs7gAc3gDOH5XVLWWUSXgosIiyFIMKd08fBJHn8EthDT7ZdCjWTepR9CoRDp+flkMRQuLW1RPycFwfEzx+GU98vQsOb3gn3ZHEGMPLK/fB6vYjL0mLK8blRvwxbG4/dCoRqXo17GE+R4HnMLUhcXv5zvaXQxk1CtQ7fTF5DTuLMYaDNS7UOLy0f1UEBWTWsPFfy7NuRo0Ik0ZskltBYo8Ci24wNNMUWuf63vqD2FhcF+MW9RwCz4Ex0HIoQkjcEngO/5g2CCkGFcqsHjz/3Z6wlwpFyvIdldhYXA+RD+4SHukSp5LMYPf6kZuoRVaiBiqRC3t57+mD08AB+P2QFeVWd5vHqhUCPAGpR/X5FTYPDtY5kaRTxU0Cf28gyQy1Dh9mj89r8fo5E/rC4vLT/hFxhgKLbjJ9WAamDU0HA/Dcd3twqN4V6yb1GHqViCq7F74ALYcihMQnk0aBe6cPhkLg8NvBenz4a0m3PXaZxY23fj4AAJg9Pg95ybqIP0ad04ckvQoZZg0MKhEpBjUsYc4kpxrVGJWTAAD4bkdlu8drFSLKrJ4eccJo9/ixv8oBlShQGdMoeGVVIa6ekIe/n5ofmrkwakTcdloBrp/Ur8fm4/RmFFh0o+tP6YfBGUa4fBIeW7YLzh401RtLoYQ+d88ZwSKEHHvyU/W4ZUo+AOCjjaXYUNR2TkEkSDLDC9/vhTcg47gsE84ZmRnxx/AGJEiMITdJC4UQ3OsguEsxQyDM/LdpQ4PLoX7YXdnubQxqEXZPABZXfPf5fknG/ioHnD4JCdqeWyI3Xm07ZMH7G0pw6evrccHxffDbfVOx8b6p+O2+qZg1NgeH6tyhfSxI/KDAohspBB73nDkISTolDlvceP777p8u74l4jgPPcahxeGPdFEIIadOpg9Jw9vAMAMDz3+2N+uz00k2l2FNph04p4LapBVGphlPr8CHTrEbKEfkDiTolTBoFbGEmcZ+YlwizNrixWXvLgUWBB2Ms7vv8kloXKmyeuNpMrrfwBiS80lCIYEC6AQ5PAIVVTtQ7fSiscqLS5qWgIk5RYNHNEnRK3DcjOF2+sbgeH2zovunyeCDwHFQi3+F1qDqViFqHj0r2EkLi3jUn98XQTCPcfgmPf70LLl90Zqf3VtpDS65umNQfqVE4wXV4AlAreeQkaptsmibwHDLNGrj9gbAqOIkCj9MGNSZxt78cyqBWoMrujds+v8ruQVGtEwlaJeVVRMGSjaUot3qQqFPiqoZCBJIc3DG8JyyRO5ZRYBEDBWmG0HT5kt9K46L2ebSpRB5pJhXyU3VI0CmQn6pDmlEFVZi1vnVKAS6fRNWhCCFxTxR4/HPaICTqlDhU78ZLP+yL+Oy0xy/hhYbdtScWJGPSgJSI3j8AyIzB6gkmbBvUzUuoJutV0KtEOL3hnfyfMSQYWGw+WI8qe9ubxgb7/ADq43A5lMsXwIEqJ0SOg1YZ2X1CCHCw1onPthwGANxwSr+I78VCoivmgcUrr7yCvLw8qNVqjB07Fr/++mubx1ssFtx8883IyMiASqXCgAED8PXXX3dTayPn1EFpOGdEcC3sSyv2orgmdrXPo00l8uiTqMH76w9izOM/4ITHV2DM4z9g8YYS9EnUhBVccBwHgeNQbY/vqXFCCAGCs9P3NJQaX3egNuKlxhetLcZhixuJOiVunNS/yWxCpNQ7fTBrFcgwa1q8Xq0QkG5Sw+YN7+Q/06zBcVkmMAA/tDNrwXEcFLyAyjjb00KSGQqrHLC4fUjUUV5FpMmM4eWVhZBkhrF9EzG+f3Ksm0Q6KKaBxZIlSzB37lw89NBD2Lx5M0aMGIFp06ahqqqqxeN9Ph9OP/10FBcX45NPPsGePXvw5ptvIisrq5tbHhl/PakvRjTUPn/s652we3rnaLxZp8Bra/bjXysKQxvd2NwBLFixD6+vOQCzNrzNhAxqEbVOL9y++JwaJ4SQIw1KN+KGSf0BAO+vP4jfDkam1Pimg/VY9kc5AOD20wpanE3oKr8kwyfJ6Jusg0psvdpRil4NhcCHvWTpjIaduL/fVdXukhajRkSdyxd2Hkd3OFTvQpnFg1SDOirB3LHum+0V2FNph0YhhP52SM8S08DihRdewLXXXos5c+ZgyJAheO2116DVavHOO++0ePw777yDuro6fPHFFzjppJOQl5eHSZMmYcSIEd3c8shorH2eZlSh0ubFM8v39Lq1gwLPwaxRYNHa4hav/8/aIpi1irDWqGoUAtw+iapDEUJ6jGlD05uUGm9vH4f22Nx+LFixFwAw87iMUBnXSKtxeJFuapqw3RKjRkSSTgVbmMtUx/dLgkElosbhxZaS+jaPVYkCfAGGemd8zFTXOX0oqnbCpFZEfJ8QAtQ6vHi34VzhqvG5tNlgD9Wlvwyfz4c9e/YgEOj4aILP58OmTZswderUPxvD85g6dSrWrVvX4m3++9//Yvz48bj55puRlpaGYcOG4YknnoAktT5S4vV6YbPZmvzEE6NGgftmDIFK5LG11IJFa4ti3aSIEnigzukPzVQczeYOwOoOQAwjsOA4DgqBR5UtPr5kCCHdI9778fZcf0o/DEwzwOmV8MTXuzqdkMwYwyurC1Hv8iM7QYPZE/Ii29AGTm8ASpFHbqIOfDt9M8dxyDCpITEW1sCYUuQxZVAqgPB24tYpBVRYvfCHWdY2Wjx+CYVVdkiMQa+mNf/R8PqPB+D2SxiYZsD0YRmxbg7ppE4FFi6XC9dccw20Wi2GDh2KkpJgVYq///3veOqpp8K6j5qaGkiShLS0tCaXp6WloaKi5c7mwIED+OSTTyBJEr7++ms88MADeP755/HYY4+1+jhPPvkkTCZT6Cc7OzvMZ9l9+ibrcPvUAQCAL7aWYeXulpeC9TQ7yqy47/PtMGrE0MY2RzNqRJg0IgJhztQYVArUu3y0Bwghx5Ce0I+3RSHwuGf6IJi1ChTXuvCvlfs6lTewak8V1u6vhcBzmHv6wDaXKHUWYwwWtw99EjQwhblMNVGnhFGjgC3M5bzTGpZD/VpUh7p2dtg2qIP3G8uN0GSZ4UCNA3VOH42iR8m6A7VYdyD42b55Sj5V2urBOhVY3HPPPdi2bRtWr14NtfrP8nZTp07FkiVLIta4o8myjNTUVLzxxhsYPXo0Lr30Utx333147bXX2myr1WoN/ZSWlkatfV1xcn4yLh7dBwDw71X7sK/SHuMWdV651Y0nv9mFuz/7A78drMe6/bW4upWRtTkT+sLi8oe9BEyjFODxy2Hv+EoI6fl6Sj/eliS9CnefOQgCz+GnfTX4vKHqTbgqbR68tia4u/blJ+YgP1UfjWbC4vLDrFGiT4I27NuIAo8sswYunxRWwJSTqMXgdANkBqzY3XYSt8AH9zGqdrRdRSqaym0eHKpzI1mviso+Icc6ly+A19fsBwCcPzILfaOwczzpPp0KLL744gv8+9//xsknn9wkeWno0KHYv39/WPeRnJwMQRBQWdm0U6msrER6enqLt8nIyMCAAQMgCH+O0gwePBgVFRXw+Voe9VCpVDAajU1+4tUV43IxJjcBfonhiW92xWWZvbY4PAG8/fMB3LR4M9burwXPBUemjGoFbpjUH7edVhCauTBqRNx6Wj6un9SvwyNRapFHlS2+KoUQQqKnJ/XjbRmaacK1J/cFALy7rhhbSy1h3U6SGV78YS/cfgmD0w248Pg+UWmfX5Lh9geQm6yFWtGx2ZAkvRJapQBnmMU1QkncOyvbLcWrV4motnujth9IW6wuPwqr7NAqxajMEBHgvXUHUev0IcOkxmUn9qzZSNJcpwKL6upqpKamNrvc6XSGXSVBqVRi9OjRWLFiRegyWZaxYsUKjB8/vsXbnHTSSSgsLIQs/7nWcu/evcjIyIBS2fPLvvEch3lnDESWWYMahw9PfbM75utKw+GXZPx322Fc995v+GJrGQIyw/E5ZvzrslG4ZUo+1AoBh+rcmDU2B7/dNxXr7zkV6+85DaNzElBS6+rw7pk6lQiLywc7LYcihHSCVilClmX4YrBz74zhGThtUCpkBjyzfDcqbe2PxH+x9TB2lNmgUQiYe/rAqC0TqXP6kG7SdGqjPa1SRLpRHfZyqJPzk6FVCii3evDHYWs79x0s3FHfzcuhfAEZhdV2+AMMJk3kK28RYHeFLVTh7KbJ+RS89QKdCizGjBmDZcuWhX5vDCbeeuutVoOClsydOxdvvvkm3n33XezatQs33ngjnE4n5syZAwC46qqrcM8994SOv/HGG1FXV4fbbrsNe/fuxbJly/DEE0/g5ptv7szTiEs6lYj7zxoMrVLAznIb3vzpQKyb1CrGGNYdqMUtH2zGmz8Vwe4NICdRi4dnDsUj5wxDbtKf05negIxKmxeFVU5U2rw4/YU1mP2fjfhxX8c3B1QrBHglGdYYrrklhPRc6UY1shO1qHZ4Ir5xXXs4jsNNk/ORn6KH3RPAE9/sgjfQ+ih/UY0D768/CAC4dmJfpJsiv7s2EFyOwvPBZUqdDVxSjSoIPBdWwKZWCKFN/b7b0XYSN8dxUIsCKqxuyN1UOZExhuIaJ6rtlFcRLQFJxr9XFoIBOHVgKkZmm2PdJBIBnSpt8MQTT2D69OnYuXMnAoEAFixYgJ07d2Lt2rVYs2ZN2Pdz6aWXorq6Gg8++CAqKiowcuRIfPvtt6GE7pKSEvD8n7FPdnY2li9fjjvuuAPHHXccsrKycNttt+Gf//xnZ55G3OqToMWdpw/EY8t24pvtFeiXrMeZw1peHhYrhVUOvP3zAWwvC1ZnMWsUmDU2F6cPSWvzS0mSGXiOw4T+yVi66RA+/q0U4/omdrgeuEYUUWnzIMusabdqCSGEHInnOfRL0cPpk1Bt9yLNGJ2T9dYoRR73zBiEO5ZsxYFqJ15ZVYg7pg5o1g/6AjKe/24vAjLDuH6JmDo4rZV77BrGGOqdPvRL0SOhC5u+mTQKJOuUqHX6wpr1mDY0Hd9sr8Da/bWwuv1tzgoY1MHCHTaPH2Zt9FcoVNq8OFjnQpJOSYnEUfL51sM4WOeCQS3irw1LBEnP16kZi5NPPhlbt25FIBDA8OHD8d133yE1NRXr1q3D6NGjO3Rft9xyCw4ePAiv14sNGzZg7NixoetWr16NRYsWNTl+/PjxWL9+PTweD/bv34977723Sc5Fb3Fi30TMGpcLAHj9x/3YWR4f5RVrHF688P0e3PHxVmwvs0Ep8Lh4dB+8fuVonDksPewO+NyRWVCKPAqrHNgS5jrjIxnUIqweP+xxtHESIaTnUCsEFKTqoRA5WGNQDCLVoMY/zxwEngNW7anG/34vb3bMe+uLcbDOBbNGgVumFERtQzar2w+DRoHsxPATtlvCcRwyzBr4JTmsghz9U/TIT9EjIDOs2tN2NUSlyEOSZdQ6op97aPf4UVjtgErgO5xrQsJTZnHjo1+DRRj+dnI/WmrWi3R6H4v+/fvjzTffxK+//oqdO3fi/fffx/DhwyPZtmPeJaP74KT+SQjIDE9+sws1jtjt3+D2SXh//UFc//4mrNpTDQCYPCAFr15xPK4anwetsmOTXyaNAmc2JO99/FvHK7woBB7+AEO9i/a0IIR0jlmrRH6qAS5foNN7S3TFcX3MmHNScKT2rZ8PYPthKwSeg0rksbvChi+2lgEA/n5qQdROvCSZwemTkJukhUbZ9ZPoRJ0SZo0S9jBzLc4YGpyF+W5HRbsFOXRKBSptnqjmxgQkGQeqnXB6A12avSGtY4xh4epC+CQZI/qYMGVgSqybRCKoU0uhGvetaE1OTk6nGkOa4jgOt502AIct21Bc68ITX+/CUxccB6XYfTt+SjLDD7sqsXjDwVDi3JAMI645uS8GpBm6dN/nj8rC13+UY0eZDTvKrBiaaerQ7bVKARU2L7ITdTRVTQjplEyTGg5PAEU1DqQbNd3el5w7IhOFVQ4cqncj3aRG/1QdrG4/zjouAyaNAr8U1uLEvolRe/xahxepBhXSI7QcTCHwyDCrsavCBjPaPzGfNCAFb/9chNJ6N3ZV2DEko/WKX3qViEqbGxaXD6lRWr52sNaFcqsbaZ1IYCfhWbWnCtsOWaEUeNw0OT9qM3EkNjoVWOTl5bX5QWhrJ2zSMRqlgPtmDMHcj7diX5UDr6wqxO1TozclfqQtJfV455ciFNe6AAAZJjWunpCH8f2SIvL4yXoVThuUiuU7K/Hxb4fwyDkdCyz0KhF1Th9sbj+NLBFCOoXjOPRN1sHh9aPa4UG6UdPtjz/vjAHok6DFO78U4bYlW2BzB2DUiJg9Pg//nD4Qh+rcHa6eFw6PXwLjgNwkLUQhcgNWyXoVNKIAly/Q7my2VinilIIUfL+rEst3VLQZWAg8B4HnUWn3RCWwqLZ7UVznRIJWGdHXg/zJ6vbjrZ+LAACXnZiNTHP3/r2R6OvUX86WLVuwefPm0M+GDRvw2muvYcCAAVi6dGmk2xhXOA4QBA4ef/eVKUw3qfGPhrW4K/dU4b/byqL6eCV1Ljz81Q48+N8dKK51QacScM3JffHK5cdjQv/kiAY1F47uA54DNpfUo7DK0aHbKgQeEpPb3bmVEELaohR5FKQZoFEIMdk/KM2kxn/WFuHllYWwuYN5YzZ3AC+vLMTraw7AHOYO2B1V6/Qiy6xGYoQHZnQqEalGVdi5K2cMCS6H+rmwBo52yogb1CJqHb52j+soly+A/dUO8OA6vLSXhO/tnw/A7gkgL0mL80dmxbo5JAo6FViMGDGiyc+YMWNw7bXX4rnnnsO//vWvSLcxrjSObnEcg60bE/5GZv+5FvedX4qwrRMJz+2xuHxYuLoQf/9wMzYdrIfAczhnRCbeuGIMzhuZBUUURnAyTBqcUhBcX7l0U8dzLXRKBartXgR6wH4fhJD4ZVQrkJ9qgC8gwx3mJm+RIPAczBoFFq0tbvH6/6wtglmriPgSLavbD51KRE6iLioz4GlGDXiOC2svpoHpBuQkauELyFizt7rNY7VKER6/hPoIDihJMsOBaicsLh+SaPY7araWWrBqTzU4ALdMKaBZoV4qou/qwIEDsXHjxkjeZVxKMaiQn2qAs5sT/s4dkYkpA1MgM+Dpb3ejIoyNlcLhC8hYuqkU1723Cd9sr4DMgHH9EvHKX47HtRP7wRjlag0XjQ7uIrtufy1K61wduq1eJcLuDcSkqgshpHdJM6qQl6xFnav7BitEPliVqnGm4mg2dwBWdwBiBAMLSWZweP3ITdRCp4rO6LxZo0CCThFW38xxHKZ1IIlboxBRYfOEVXkqHIfrXThscSPVoKb1/lHi8Ut4ZVUhAOCs4RkYmN61HE0SvzoVWNhstiY/VqsVu3fvxv3334+CgoJItzEu9UnQoG+yDrVOb7ftjs1xHG6eko/8VD3s3gAeX7azS4ENYwxr9lbjhsWb8H/rDsLtl5CfoscT5w/HfTOGICuhe9Y+5ibpML5fEhg6Pmsh8Bxkxmg5FCGkyziOQ26SDhkmDars3nZPcCMhIAd3dTZqWj7BN2pEmDQiAhHcGK7OGdz0LSOK69t5nkOmWQNvQA5rE8IpA1OhEDgcqHG2uyzWoBZhdfkjMqBU7/ThQI0TBpUYlVl5ErRkYykqbB4k6ZS4cnxurJtDoqhTf0VmsxkJCQmhn8TERAwZMgTr1q3Dq6++Guk2xqXGJVFZZi2qbN23e6tKFHDv9MEwaxQornXhpRX7OvXlt7PchnmfbMNz3+1Btd2LZL0Sd0wdgOcvGYHhWR1Loo6EixtmLdbsrUaFtWMzMXqliCq7N6olCAkhxwaFwKN/qh4GtdgtAxaSzGBx+zFnQssbhM2Z0BcWlz9io/PegASZMeQm6aJ+Ip2oU8KoFsPab8igVmBC/2QAwPKdlW0eG8yvY6jtYgl2j19CYbUDksRgUNM+CtFSVOPEZ1sOAQBumNSfclh6uU69u6tWrWryO8/zSElJQX5+PkTx2PnAiAKPgjQ9vAEJVfbuqyaSYlDh7umDcP8X2/FLYQ2WJutwyZjssG5bbnXj3bXF+GV/LQBAreBx0fF9cO7IrJhuBFSQZsCobDO2lFrw6eZDuHlKfti31alEVNk9sLr9SDGoothKQsixQK8SkZ+mxx+HrXB6A1FbLtTI4vTj+kn9AARzKhqrQs2Z0BfXT+qHQ3XuiD1WrcOH7EQNkvXRzyVQiQIyTGrsqbCHtQ/HtCFpWLO3Gj/urcY1J/Vtc18Ng0pEpd2L7ERtp767ZJmhqMaBWocXGSaqTBQtkszw71X7IDNgfL8kjOuXFOsmkSjrVG85adKkSLejx1IrBAxIN+CPQ1bUOrxI0nfPie3QTBOuO6UfFq7ej/fXH0TfZB1OyEuEwHMQeQ4BmTUZ4XJ4AljyWwn+93s5AjIDzwGnD07DrLG5cVOq9ZIx2dhSasEPuypx2QnZYb+WAs+B5zjUOrwUWBBCIiLVoEa/pAD2VtmhFPmoju57AzIO1bkxa2wObp7SH1Z3ACaNCIvLH9FSs3aPH2olj+xEbbflEiQbVDhY54LbJ7W7Ad+wLBMyTWqUWT34qbAaZwxJb/VYnUpEudUDi8uPdFPHA4sKmweldW4k61Xgu+m1aO37uTf7Zns59lY6oFUKuP6UfrFuDukGYQcW//3vf8O+03POOadTjempjGoFBqQZsP2wBTa3P+rJzo2mD8vAgWonvt1Rgc82H8LpQ9KQ1lDiz6RRwOLyo8buxedby/DRryWwN5TnG5ltxjUn9UVesq5b2hmuYVkmDMkwYme5DZ9vOYy/TQy/E9IpRdQ4vPD4pZjOvBBCeo/sRC3s3gDKLG5kmjRRPRn3BmRU2ryocfgg8hyq7d6InnzKjMHm8WNQuqFbl/0Y1AqkGlQos3jaDSw4jsMZQ9OxaG0xvttR2WZgwXMcFAKHSpsHaUZVh94bq9uPwioHtEoRKjH63xcqkYdZp4BZo2jy/Wxx+aOyP0m8qLZ78X/rDgIAZo/P67aBVxJbYQcW5513XljHcRx3TG6Ql2JQoSDNgJ1lNigEvt0ONFKuO6UfZMbw5AXDsWhtMd5dVxyaRr96Qh6untAXK3dXwe4NIDtRi7+elIfROQlxW/nikjHZePirHfh2RwUuHpMd1vQ5AGhVAipsftjcfgosCCERIQo8ClINcPkk1Dh83TIjKkVpNLve6UOiThnVhO3WpBrVOGzxwC/J7c78nDooFe+tP4g9lXYU1zjbHAAzqhWoc3ph9wZgDDNY8gVk7K9ywBuQumX5skrk0SdRg9fW7MeitcUtLnPrjcEFYwyv/7gfbr+EQekGnDms9SCRRF93FRkCOpC8LctyWD/HYlDRKMusQb9kHepc3VcpSiHweHjmUCxaW9xsc6V/rSjEf34pwn0zBuGmyf3x8mWjMCY3MW6DCgA4PseM/BQ9vAG5QxsB8hwHgQuO8hFCSKRolAIKUvXgOAZHGEnI8cgvyfDLMnKTdN0yQn+0BK0SCVpFWHs/JWiVGNs3EQCwfGdFm8eqFQK8ATnsPS0YYzhY60SV3YsUfeR37m6JWafAa2v2418rmn4/L1ixL6qbH8baugO12FBUB4HncMuU/G5bbkb+xFiwzyqzuGHzBnNQVWL0K59RbbUI4jgOeck69EkIVorqjjWUAs8h2aDEu+uKW7z+3XXFOKkgGWcflxnxDZaigeM4XDwmWCFq2e9lcHZgd1WDWkSt09etm1sRQnq/JL0K/VP0sHp8PbL6XI3DizSjGikxWooiNJSe9QSksKoYTmtYArVqTxW8gbb7c60ymGsRzr4jVXYvDta6kKhVdsv3Yaw2P4w1pzeA19ccAABceHwf5CbF17Lr3s4vyah1eFFudUNiMvql6DA6JwHH9TF1y4qOTpe6cDqdWLNmDUpKSuDzNR0tuPXWW7vcsJ5KFHjkp+rh8UuodniQFuUNd8LZXMnWsLlST0kWG9cvCdmJWpTWufD1H+W4OMyKVxqFAIvbB4vbB42SqnwQQiKnT4IWDm8AJXUuZJg0PWYE1ukNQCnyyE3SgY/hCWyiTgm9WgG7J9BuHuLIHDNSDSpU2b1Yu78WUwamtnqsQSWixumFxe1HchuBk8MbQGGVo1uXKndk88Oe8v0cjv9bfxB1Lh8yTWpcGub3N+k6ly8Am8cPDhzMWgUKzAYk6ZTdvjy8U4HFli1bMGPGDLhcLjidTiQmJqKmpgZarRapqanHdGABNK0UVef0RTVh6cjNlVrqvBo3V+pJS4R4jsPFo/vghe/34sttZZg5IjOsPwyO4yDyPKpsVD6QEBJZPM+hX4oeTp+EGocXqYbuWUrTFTJjsLh9KEg1hJ2vFi1qhYAMoxr7quztBhY8x+H0IWlYvKEEy3dUtBlYiA05GzV2b6uBRUAK5lU4PAFkmLrvffNLMgzqtr+fDWqxw3s3xbNd5TZ880c5AODmKflQdsPSm2OZJAeLMrh8AWgUArISNEgzqGHuplm5lnTqHb/jjjswc+ZM1NfXQ6PRYP369Th48CBGjx6N5557LtJt7JEaK0UBCGtdaWd19+ZK3eWUgpRQhavv2llneySDSgGLy9ehJVSEEBIOtSKYb9E4Eh3vLC4/zBolshLiY6AlxRhc4+3xt79cdergNPAcsKPMhkP1rjaP1asUqG6oCtiS0joXKmxupBo6Vj2qK/ySjBe+34uf9lVj9vi8Fo+ZPT4PP+6txoNfbu8Rn6f2+CUZ/15VCAbgtEGpOK6POdZN6rU8fgmVNg+q7B4oRR5DMowYk5eIIRkmJOlVMV1e16nAYuvWrbjzzjvB8zwEQYDX60V2djaeeeYZ3HvvvZFuY4+VYlAhP00Plz8Q1XX/jZsr3XZaAYya4CSUUSPittMKcP2kfrC4el6HJfAcLjw+mGvx2ebDYSfDa5QCPAEZll7QSRNC4o9Zq0R+mgEuXyCsE+RY8UsyPP4AcpM7t4FcNBhUIpL0qrBOopP1KozOTQAAfNfOTtw6pQCnN9DiTuk1Di+Kap0wqZWh2Y1oc/kCmP+/nVixuwrPfLsH153S8vfzdaf0w4s/7MWWUgtuX7IFu8tt3dK+aPlsy2GU1LlgVIv460ktD3aSzpMZg9Xtx2GLGw6vH6lGFUbmmDEmNwE5Sbqob+QZrk61QqFQgOeDf6CpqakoKSnB4MGDYTKZUFpaGtEG9nRZZg08fgmFVQ6kCuqobLLUXZsrdbepg9Pw0cZS1Dp9WLm7CtOGhleuTinwqLJ5kGmKbn4LIeTYlGFUw+72o7jWiXSjJi6Tb+ucPqSbNHG1ZIvjOGSYNKiwBoubtPe6TRuajo3F9Vi5uwpXjstt9fuT4zioRAGVNg8yjuj33b7gdy8Y120nXTUOLx75ageKa11QK3j85cQclFs8LX4/l1s8uHlyPp78ZjcOW9y4+/M/8NeT8jDzuMwe9911uN6NJRtLAADXTuzXbft5HQt8ARlWtx8+SYZBLWJAmh7JBhUMKjEuPyedOssdNWoUNm7cCCC4C/eDDz6IxYsX4/bbb8ewYcMi2sCejuM45CVFv1JU4+ZKhVVO1Dt9KKxyotLm7bFBBRAspXv+qCwAwCebDoX92ulVIqwuPxy0HIoQEgU8z6Fvig4pBhWqHfG3Pt7lC0DggZxEbdwFPQlaBcxaRVizFmNyE5GoU8Lq9mNDUV2bxxrUIupcPtgaSgJLMsP+agcsLj+S9cqItL09RTUOzFu6DcW1LiRoFXjy/ONwQl5im9/PuUk6vHDJCJycnwxJZnjzpyI8/e1uuHw95/uLMYaFqwvhlxhGZZsxaUBKrJvU4zHG4PAGUGZ1o97tQ4JOgZHZZozJS0C/FD2MakVcBhVAJwOLJ554AhkZGQCAxx9/HAkJCbjxxhtRXV2NN954I6IN7A0aK0UlG1SosnvCKrfXWZLM4A3IPS6nojVnDk2HUS2iwubBT/uqw7qNWiHAE5B65BIwQkjPoBIF5KcaoFYIqHeFt49Cd2CMod7lQ58ELRJ03XNC3RGiwCPTrIHbH2j3u1DgOUwdnAYAWL6j7Vw7lSjAF5BR5wgWKimzuHCo3tVteRWbS+rxz0//QK3Th+wEDZ69aATyU/VNjmnt+1mrFPGPaQNx3cR+EHkOv+yvxdyPt6G4xhn1dkfCit1V+P2wFUqRx02T8+P2hLcnCEgy6pw+lFndCMgy+iXrMCY3ASOzzUg3qWOyD01HdSqwGDNmDKZMmQIguBTq22+/hc1mw6ZNmzBixIiINrC3aKwUpVMF91og4VErBJwzMjhr8fGmQ5DDDMrUDdPici8JsAgh8cekUaAg1QBvQI6b/XOsbj8MagX6JGhj3ZRWJetV0KlEOL3tv2anDwkGFltLLaiwtT07ZFApUGH1otruxf5qJ4xqRVSWHx/th52VmP+/nXD7JQzLNOKZC0cgzdixJWgcx2HmiEw8ecFwJOtVOGxx485PtmHl7rbzS2LN4vLhnZ+LAACzTsxBejdW3epNXL4AKm0e1Dh90KoEDO9jxgl5iShIM8CsVfaoYK1Tf3GPPfYYioqKIt2WXs+oVmBgugEcolspqrc5a3gGtEoBpXUubDhQG9ZtDGoFrB4/7LQcihASRWlGFfomaVHn8oa1SVs0BSQZTp+EvGRdt+3V0BlqhYB0oxo2b/uDbOlGNUZmmwEA37eTxK1XibB5/SiqcUCSGQzq6K7zZ4zhgw0HsWDlPkgyw6QBKZh/7jDo1Z3P5xiUbsRLl47EqGwzfAEZL/6wD/9euS9uN2Z8++ci2L0B9E3W4ZwRmbFuTo8iyQwWlw+HLS54/BIyzWqMyjHj+JwEZJk1cVN0oaM6FVgsXboU+fn5mDBhAhYuXIiamppIt6vXStZ3T6Wo3kSvEnHW8ODSu49/OxTWUjKlyMMfCP7REkJItHAch9xkHTJMGlTZvVFd6tqeWqcPaUYV0gyx2WG7I1INwWIm7e2sDSBUuOOHXZVtLvMVeA4ix6HO6Wtzs7xI8EsyXlqxDx9uDBasuXh0H8w9fUBEZkhMGgUemjkUl5+YAw7A8p2VuOvTbXG338Xmg/VYvbcaHIBbpuR3W9Wtns7jl1Bl86DS5oZC5DE4w4jReYkYkmlCcoxLxUZCpz4F27Ztw++//47JkyfjueeeQ2ZmJs466yx88MEHcLnarjdNgpWi+ibrUOfyhl1G9Vh37sgsqEQehdUObCm1hHUbrSK4HKq35JsQQuKTQuDRP1UPvVpsseRpd/D4JYALJmz3hBM8o0ZEkk4Faxi5cGP7JsKkUaDO6cNvB9tO4k4xqJAZ5Z3Rnd5gOdmVu6vAc8DNk/Nx1fi8iD6mwHP4y4k5eOScoTCqRRyoduL2JVuwoSi8Wfto8/glLFxTCACYOSIztG8XaZnMGGxuPw5bXLB7/Ug2qDAqJwGjcxOQm6SDPk5KxUZCp3ufoUOH4oknnsCBAwewatUq5OXl4fbbb0d6englQY9lTSpF2enENxwmjSI0avXxb+GVNNarg7ud2j207IwQEl16lYiCND0CjHX7Bp2MMdQ6fehj1iAxDhO2WxIsPauGxFi734EKgcepg4K7b7eXxM1xXFTXo9c4vLj7s9+xtdQCtYLHA2cNwZnDonfeMyonAS9dOgqD0g1w+iQ8tmwXFq0tivl5w0cbS1BpC+52PmtsTkzbEs/8kowahxcVNjcYBxSkGjA6NxHH9TEh1RidLQhiLSLPSKfTQaPRQKlUwu+nk7hwNFaKSumGSlG9xQWjsiDyHHaU2bCjzNru8QqBh8Rk1FOyPCGkG6Qa1OifrIPV4+vW2WibJwC9SkB2orZHJXkm6JQwahSwhTH4c0ZDEvemg/Woaaj81N2Kahy486hysmPyEqP+uCkGFZ44f3goh+HTzYdx3xd/xGx27EC1A59vOQwAuHFSP2iVvWe0PRKOLBVb5/TBqBFxXB8zTshLQP9UPUya+C0VGwmdDiyKiorw+OOPY+jQoRgzZgy2bNmCRx55BBUVbY8mkD+pFQIGpBmgp0pRYUnSq3Baw6jVx78dCus2WoWISnvskyoJIceG7ERtQ75F9wwYSTKD0+dHbpI2bnbeDZdC4JFl1sDtk9p9rfokaDE00wiZBXMtultjOdk6pw/ZiVo810I52WhSCDyundgPd585CBqFgB1lNty2ZAv+OGTptjYAwc/by6sKITPgpP5JOLFvUrc+fjwLSMGBzDKrB35JRl6SFqNzEzAqOwEZJk2PKBUbCZ0KLMaNG4f8/Hx88sknmDNnDg4ePIgVK1bgmmuugclkinQbezWDWoEBVCkqbBeO7gOeC3byhVWOdo/Xq0TY3YHQpkmEEBJNjbPRZq0SNY7oDxjVOr1I0qmQbtJE/bGiIUmvhEYpwBVGMZPG5bDf76wMu/R4JHy/swKPfLUDbr+E4VkmPHPBcUjtYDnZSDkpPxkvXjISeUlaWFx+3P/ldizdVNptr8eyP8pQWOWATingulP6d8tjxrPG2YkKmxs1Ti/USh7Dsow4IS8RA9ONSNApwffwZOyO6lRgcdppp+GPP/7Ali1bcOeddyIzs2slxl555RXk5eVBrVZj7Nix+PXXX8O63UcffQSO43Deeed16fFjjSpFhS/DpMEpBcFdPcPJtRAFHjIY6pyxmTonhBx7tEoRBal6cByDI4qDGt6ABMaA3CRdj12rrVWKSDOqYA1jOdSE/knQqQRU2b3YWmKJetsYY1i84SD+tTI4Qj9pQAoeOWdol8rJRkJWwwZ8pw5MhcyA/1t3EI8t2xnVzxoAVNk9eG/9QQDA7Al5PSafJxp8ARm1Di/KrW74JRnZCVocn5OA0bmJ6JOgjetyz9HWqZ7o8ccfx7p16zBs2DCo1Wqo1WoMGzYMb731Vofva8mSJZg7dy4eeughbN68GSNGjMC0adNQVVXV5u2Ki4sxb948TJw4sTNPIe40Voqqd1OlqPZcNLoPAGDdgVqU1LVfhUyvFFFlo9eVENJ9kvQq9E/Rw+rxRW0PglqnF5lmNZL1PfsEL9WghsBz7b5OKlHAlIENSdw7o7vs2i/JeOmHffiooZzsJWOycWeEyslGgloh4PapBbhlSj4UAoeNxfW4bckW7Ku0R+XxGGN4bc1+ePwyBmcYQ7NHxxJJZrC6/SizumDx+GDUiqGN7AZlGJHUC0rFRkKn/kIefPBB3HbbbZg5cyaWLl2KpUuXYubMmbjjjjvw4IMPdui+XnjhBVx77bWYM2cOhgwZgtdeew1arRbvvPNOq7eRJAmzZs3CI488gn79+rV5/16vFzabrclPPGqsFJVlpkpR7clN0mF8v+C6zqWb2p+10KlEOLwBWMIoa0gIiT89pR8/WlaCFjmJWlTbPRFfqmL3+KFRiD0uYbslZq0CyTolrGEsB542JHhCu6GoDvVR2qfI6Q3gka92YOWeYDnZW6bk48pxuXH3OnMch2lD0/HsRSOQblSjyu7FPz79Hd9sL494fs/a/bXYWFwPkedwy5T8qJbzjTdun4TKhn0neA7ITzFgTE4iRmUnINOsOaZnJ1rSqcDi1VdfxZtvvoknn3wS55xzDs455xw8+eSTeOONN7Bw4cKw78fn82HTpk2YOnXqnw3ieUydOhXr1q1r9Xbz589HamoqrrnmmnYf48knn4TJZAr9ZGdnh92+7kaVosJ3yZjg+/jj3up2Nw0SeA4cB1oORUgP1ZP68SMJPId+KXokGVSotkeu/5EZg83jR06iJuq7S3cHjuOQYdbAL8vtDqrlJeswMM0ASWZYubvtlQ2dUW334p+f/o5th6zBcrJnD4n70fn+KXq8eOlIjO2biIDMsHD1frzww97g3iYR4PAG8PqP+wEEVwzkJGojcr/xrDER+7DFBZc/gHSTGqNyEjAmLzFY2Unbuys7dUWnAgu/348xY8Y0u3z06NEIBMJf41dTUwNJkpCWltbk8rS0tFarS/388894++238eabb4b1GPfccw+sVmvop7Q0vD0QYoUqRYUnP1WP43PMkBnw6eb2K0TplQpUO3xh7fJKCIkvPa0fP5JaISA/RQ+FwIU1Ih+OeqcPiTolMs295wQvQauESa2AI4w9QM4YGjxn+G5HRUQH4A5UOzDvk204WHdEOdnc6JeTjQS9SsR9MwZjzoQ88Bywek817ly6DaX1Xd+0+N21xah3+ZFl1uDi0T0jqO8MxoI5UeVWN2ocPqiVPIZmmjAmLxHDsoL7TijF+FgKF8869QpdeeWVePXVV5td/sYbb2DWrFldblRr7HY7rrzySrz55ptITk4O6zYqlQpGo7HJT7w7slJUpL6IeqPGWYsfdlWitp265lqVAKfHH9Yur4SQ+NIT+/EjJeiUyE8zwOULdHkU2ReQ4Zdl5CXpetVJjlLkkWFWw+Ftv4+emJ8CjUJAmdWD7Yfb39MoHJsP1uPuz2JXTjYSOI7DBcf3wePnDUeCVoGSOhfu/HgbftpX3en73FFmxbcNmxLePCW/V33mGnkDEmocXpTb3Aiw4N/W6LxgInZ2orZX7YrdHTr9ar399tv47rvvMG7cOADAhg0bUFJSgquuugpz584NHffCCy+0eh/JyckQBAGVlU1rUldWVra4g/f+/ftRXFyMmTNnhi6T5WCylyiK2LNnD/r37x3lz5L1KhSkG7CzzAqlwNMavhYMzTRhaKYRO8ps+HzLYfxtYuv5NjzHged51Di8MSsTSAg5dmUY1bC7/SiudSLdqOl0kmeN04sMkxrJelWEWxh7SXoVNAoBLl+gzU3XNEoBpwxIwfIdFVi+sxLD+5i79Ljf7azAKw17MxyXZcI9Mwb36JPJYVkmLLh0FJ79bg/+OGzFM8v3YGe5DX89qW+Hks/9koxXVhUCAE4fkobhWb1nOwFJDpaJdXoDUIgcErRKpJsMSNAqoVbQ+VZXdOovZ/v27Tj++OMBBE/2gWCQkJycjO3bt4eOa2/9mVKpxOjRo7FixYpQyVhZlrFixQrccsstzY4fNGgQ/vjjjyaX3X///bDb7ViwYEGPWXcbrkyTGm5fAPurHRCF3rn1e1ddMjobD5XtwLc7KnDxmGyYNK2vNzaoRNQ4fPD4Jeo4CCHdiuc59E3RwekLoNrhQbqx4/tOOLwBqEQeuUm6XlkbX68SkWJU4XC9u93dnKcNScPyHRVYu78Gdk+/TuWaMMaw+NcSLGmo/DR5YApuPbWgV3zXJuiUePTcYVi84SCWbjqE//1ejn2VDvzjzIFINYQ3uPbp5kMorXfDrFFgzoS86Da4m7h8wX2tGGMwqBQYkKZHol4Fo1qknIkI6VRgsWrVqog1YO7cuZg9ezbGjBmDE088ES+99BKcTifmzJkDALjqqquQlZWFJ598MlTW9khmsxkAml3eGzRWivL4ZRyqd3VplKu3GpVjRn6KHoXVDvx3WxmuHJfb6rFapQCL1Q+Ly490EwUWhJDupRIF5Kca8PshC+pdPiRowy8TKzMGq9uHglRDmwMoPV26UYOy+uDOxW2d4Oen6tEvWYcDNU6s2lOFc0Zkdehx/JKMl1fuw6o9wWVCl4zJxhVjc3rVyaXAc7hqfB4GZxjxwvd7safSjts/2oo7zxiI0bkJbd72UL0rFHBdO7FzgVu88Esy7J5Aw6Aij0yTGilGFRK0yl4RRMabmL+il156KZ577jk8+OCDGDlyJLZu3Ypvv/02lNBdUlKC8vLyGLcydnpTpSiPX0Kd0xfRvBGO43DxmOC+Fst+L4OzjcQ/juOg4DlU2duuIhWvXL4ADlvcqLR5UO/0wekN0N4chPQwJo0C+al6eANyhzZEtbj8MGuVyEromTtsh8usUSBBp2j3e4LjOJzRUK1p+Y7KDn03Or0BPPzVDqzaUx3X5WQj5YS8RLx06Ujkp+phbyil+/6Gg61W4JIZw79XFSIgMxyfk4CJBeHltMYTuSERu8zqRp3TB61KwNAsI8bkJWJolgmpBloFEi0c68lnqp1gs9lgMplgtVp7VAKg3ePH9sNWePxyj1lbyxiD2y/B4Q0gIDOoBB56tQirxw+dQoQuQmtYZcZwy4dbUFrnwpXjckNJ3S1pTJ48oW9iu1Pt8cIbkFBh9aC0zgWHVwLPMXDgoRA4iCIPlchDpxSgU4lQijyUAh/8t+H/vfXLkvQcke53e2o/3ogxhsIqB/ZXO8KaifZLMmocXhzXx4x0U+/PESu3urGt1IoMk7rN/RKc3gCu+s+v8AVkPHvhcRiU0f5nodruxSNf7cDBOhc0CgH/PHNQu6P3vYVfkvHmTwfwzfZgMvaIPibMO2MgzA0zZwLPQeQ5/LCrEs8s3wOVyOPflx+P9B6Ul+jxS7B7AvBJMvQqAWlGNZL0Kpg0Clrx0QUd6XN7xpkVgUGtQEGaAdsPW2F1++N2KlySGZzeANx+CZIsQ6MUkWJQIVmvglGjgE4poLjGib1VdihFPiIjBjzH4eLRffDC93vx5dbDOGdEZqs5FBqFAIvLB4vLH/eBhV+SUWX3orTWBYvbD4NKRKZJDY7jIDMGvyTDLzG4fRJsbj8CDaNPHABR4KAQgq+vVilArxKhUgihoEPVEHT0xnXahMQ7juOQl6yD0xtApc2DjIa/69bUOX1IM6qRYugZg0pdlahTwqAWYfcE2vyu06lEnJyfjJW7q7B8Z0W7gcWBagce+d9O1Dl9SNQq8eDMIeif0rMqP3WFQuBx0+R8DMkw4t+rCrHtkBW3L9mKR88bhjF5CTBrFLC4/Lj6pDzkp+pRWGXvEUGFJDPYPX44fRJUIockvQppRjXMWgXlU8ZAfJ9ZkSaS9SoUpAUrRSkELm5OjH0BGU5fAG5/ADx46FQCshM1SNApYVQ3/8POTtTC7g2gzOJGpkkTkRH1UwpS8MGGElTYPFi+owLnjmx5vS3HcVAIAqrt3na/zGNFkhlqHF6U1LlQ6/BBqxCajdzxHAeVKKClSR/GGALyn4FHrcOHCqsHDAxggCAEZzuUvACNUoBOJUCtEILBxhEzHSJNExMSNQqBR36aAU5fcIloUisz0S5fAAIP5CZpj5kRV5UoINOkxp4Ke7uDaNOGpmPl7ir8tK8G107s1+r34uaD9Xjq291w+yXkJGrx0MwhYScx9zaTB6aiX4oeT36zCypRwNi+iXh3bTEWrSuGzR2AUSNi9vg83Di5Pw7VueENxN+S28YVEXZPADJjMGoUGJSoQaJeBYOKErFjKT7OTEnYjqwU1Tgi3d0YY/D4g8GEV5KgEHgYVCKyEwwwaYMjTW21qzFvxOWVUOv0RWRpl8BzuPD4PnhldSE+23IYM4ZntNoGg1oM5ij4pLgqKcgYQ63Th0N1LlTZvVAIPNKN6g6fTASDJ67V5x9oCDj8kgyLy4dqB4MsM4ADBI4LzXaoRB46lQidUvwz4DhiqRUhpGv0KhEFqXr8ftjaYolVxhjqXT70T9GHlqscK5INKhysdcHtk9ostz443YDsBA1K691Ys7ca04dlNDumt5WTjYScRC1euHgkOA5YtLYYL68sDF1ncwfw8spC8ByHWWNzUGmL3K7xXeWXZNjcfnglOZiIbVYjxaBGglZBg2Fx4tj+y+qBYlUpSmYMLp8EpzcAiclQiwLMWgVSDHoY1AoYVGKHltVolSL6p+rxx2ELnN5ARPItThucio82lqDW6cPK3VWYNrT5XihAcCfcOqcPFpcvbr5crC4/SutdqLAGE8uT9aqoBY2iwEMUAA2af1lLDTMdAYnB5ZVgdQWXWDUO/jQGLEoxuMRKp2xYYtUQdGgUAiXEEdIBqUY1+nkD2FdlbzZYZHH7YVArkJ3Ye3bYDpdBrUCKQYVyq6fNwKIxifvtn4uwfEcFzj4uEyLPISAzBCQZizeUYMlvwepGUwam4O+9pJxsJOjVIvJTdbjm/za2eP1/1hbh5in9UePwtZro3Z3qnT74JBlmrQL5JjUSdcq4WblB/kTvSA/UOOLvDUiotHuQYYzOkp6AJMPpk+DyBSstaZUCMhPUSNSqYNSIXf6DTjGo0C9Zh90VjojkWygEHuePysJbPxfhk02HMHVwWqtBl0rkUWnzIMscmaVYneXwBnCozoVyqxt+iSFRp4RKjN2aUIHnIPAC0MLqA5kxBBpmOrx+GQ6PhIDsQePXjShwMGsUGJxhpM6ekA7ISdTCcdTy0IAkw+2XMCzLdMyuE08zqVFm9SAgyW2ORk8ZmIqf91Vj3rSB6J+ig83jh1GjwO+lVvx2sB4AcOmYbMzqZeVku0rkOVjdftjcLVdTtLkDsLoDEHku5oGF0xuAT5YxNMuINIOa8gPjGH3791BqhYABaQb4AlbUOHwRS+rz+CW4fBLcfgkiz0GvFtEvRQdzwxKnSJ/0ZifqYPdIOGxxRSTfYtrQdHz8WykqbB78tK8akwemtnicXi0GO9R2kgOjxeOXcLjejUOW4FR/gjb+R154joNS5FpcBtWY11Ht8GBXuQ1DMky0WzwhYQotD/VJof681ulDqkGFtGMkYbslCVolzNpg6dnWclAAINWgwpLrx+Ptn4tw59JtTfIEPr5+HP67rQxjchO7seU9Q0BmMGkUMGrEFoMLo0aESSOi2h7bpVB+SYbF7cPANAMyTL273HJvQPOBPZhBrcCANAN4Hp3eG4IxBpcvgCq7B2VWNxxeP3QqAUMzjRiTl4AT8hKRn2pAsl4VlZF0gefQPzUYuNQ4fF2+P7VCwDkNidsfbzoEuZVqyipRgC8gw+rq+mN2hC8go6TWid8O1mNftR1KXkCWWRv3QUV7gnkdPNIMGlTbvdhVbutQjX5CjnVapYj8VD04jqHW4QXHA7lJumN63bjAc8g0a+AJSG3uU2HWKfDOL0V4eWVh6AS5MU/g3XXFOGt487wLElz6anH7MWdC3xavnzOhLywuf0xnK2TGUGX3ok+CFjlJupi1g4Tv2O2xeomkhkpRbv+fS5ba01iardLmQbnNA28guDfG8CwTTuibhNG5CchO1MKsVXZL/saRX6iONja4C9dZwzOgVQoorXNhw4Ha1h9XIaLS7g0mLkdZQJJRbnVjc0k9dpXbAAZkGjXQq3t2QHE0geeQbtSg0u7B7gobPH4KLggJV7JehX4pevgkGVkmDRK08VlWvDsl6ZTQqxWtfjcIfHAJ5qK1xS1ev2htMcxa2sOgNRanH9dP6ofbTiuAURP8PjJqRNx2WgGun9QPFlfkNrTtjBq7F4k6Bfqn6Ok97CF611nNMSrTpIbHJ6GwunnyXyO/JMPhDW4Ox3EctMqGkrBaJYya2Nd6bvxC3VVuh6qL+RZ6lYizhmdg6aZD+Pj/2bvz+DirevHjn2eZfc2+tGmbriyFtrRQymJBQBTFi4qiomCvO+hFqvcqiiAqi7soSC+ooPxAEK8oiqJYNqFl697SfUvS7Mvs+8zz+2PalNAkzTLJzCTfN6+8SmbOzJyZJOd5vs855/t9vYkzZ5b1u8TKadXpjiQIxJJjlnElkzHoDMdp7I7QGUpg1TWqPbZBiz4VO01VqHHbaA3EUAhwQo07779fQhSLqSV2VEWh3GmR/QBkZ6Fr3FZ2twdxWY8NtIppn0AhiqcyNHVHuXLpNK49fxb+aAqPTccXSeY91aw/mkTXFOZUumRpbRGRwGICOFJsKZ5K09CdzRSlKhBLZrKVpvukhLUNKSVsPkwtsROMpWjqifYWghup/1g4hSc2NbOnI8SGBh+n9VNZ1aSppA6nXM11YJFNE5mksTtCezCGpqhUuYafOrZYZWcurLT6oyiKwrxqlwQXQgyBpiqTMgvUYMpdFhq6w8SS6WPGkWLZJ1DI4qkMbYE4naEEuqrQEYznPQiLHV6FcVKtmxLH5Eq1XOwK68xSjJimKsyscFLhsnDIF6ElECWaTOGxm5hf62HJ9FKWTC+lvsJJqcNccEEFHN5vUeHEazeNer+Fx2bqTTf7+3WNA7ZzmHVaA7kdRP3RJNtbAqxv6KEjGKfMYaHCZZk0QcURmqpQ7bHR4o+yszVIPCXLooQQw+e26pQ5Lf3uJSyGfQLFIp0xiKcyef+s0odn+qeXOaiVzdpFR2YsJhCrSWNetRu3xYTr8BUcm0krqul0m1nLFoxq8hGKpUa1B+H9i6bwty0tbGsOsK3Zz8m1nmPaOC06XeEE/miS0lFeFQkfThd5qCdKPJ2h1G6e9FfpNVWhymWlxR9FUWBetSuv6XSFEMVHURSqPVZa/THSGeOYizRH9glAtvbCkaxQK86q57PLZ9LUHc1Ht8UItQdjVLut1Jc7JK1sEZLAYoJxWnTmVLvy3Y1RKTu832JHaxCLaeT7LcqcFi44sYp/bGvl9683cst7jw0sdE0lnTHoDsdHHFjEkmla/VEauqNEEmlK7CbKzJM3ReRb6Vp2GVizL3twP6HaLZW7hRDDUno49WwgmjxmaUwh7xMQw9MdTmC3ZBO6yHGiOMlPTRSkqSV2pnhttAfjA6aMHYrLT5uKqsD6Bh972kP9tnFYdNoDcZLp4R18Eqls9fP1B3vY0RpEVxWmeG1Fnzp2LOiaSqXLyiFflF1twWF/1kKIyU3XVGq9NqIDpJ49sk9gT3uYnnCCPe1h2gJxCSqKSCSRIpnJMLfS2e9GfVEcJLAQBenIfosSu4nO0Mg33VV7rLxtbgUAv3+9/70WTotOOJEaci2QdMag1R9jY2MP25r9pDMGtR6bDITHYTo8c9HUE2W3BBdCiGEqc1iwmzXC8YH3axXKPgExPEeK4M0sd1Dptua7O2IUJLAQBctm1phT5UJTFIKxkefS/uDiOgDW7uuioTtyzP2aqmAY0H2cDeOZjEFHMM6mRh+bGnuIxNNUuWx47eai2seSTyZNpdJloaE7IsGFEGJYbGaNareVwCiOB6LwHCmCV+uxM00yohU9CSxEQSt1mJlZ4SAQS5IY4ZT2tFI7y2aWAfDYABminBad9lC838xFhmHQE06wtdnPxkYfPZEElS4rZc7Jl+kpF0yaSoXTSkN3hD3tQVISXAghhqjSZUXXFckyN4F0BuN47SZmVzondaX5iUJ+gqLgTS3JXsXoCMZGvN/iQ0uysxYv7Oqg1R875n6HRSccSx6zHCoQS7KjNcCGxh7aA3FK7ebsgU0Gv1Ex69ng4mBXhD3tIQkuhBBD4rbplDss+PNcEVrkRiCaRNMU5lZJEbyJQs6ORMFTD9foKHWa6RxhkaPZlU5Om+YlY8Af1jcd+xqKgqqqvfs5IokUe9qDbDjYQ0N3BJfFRJXbKlkqcsisq5Q7LRzoCrOnPSRrooUQx6UoCjUeK6mM7KModrFkmnAixexK56jTvYvCIWdJoihYTRqzK11o2sj3WxyZtVi9vY2ufjaEOy06XaEE+zpCrD/Yw572EBZdo9Zjn/T1KMaKRdd6g4u9ElwIIYagxGHGYzfLXosils4YdIXjTCu1SxG8CUYCC1E0Sh1mZlU4CY5wv8XJtR5OrnWTyhj8ccOhY+53mDWCsRS72kIoKNR6bDgskjp2rFl0jTKHhX2dYfZ1hMhIcCGEGIRJU5nitRFJpPLdFTFC7cEYlS4r9RVSBG+ikcBCFJUpXht1pXY6QiPbb/GhwxmintrWesx+CkXJBhM1Hitum0kyPY0jq0mjzGGW4EIIMSRlTjM2s0Y4LsFFsektglflxKLLaoCJRgILUVSO7Lcoc1roGMF+i0XTvMyucJJIZXhiU/Mx92uqgioBRV5YTRolNjN7OkLs65TgQggxMLtZp8plxS/LoYpKJJEimc4wp9KJW2o/TUgSWIiiYzVpzK5wYtIUAkMsaneEoih8aMlUAJ7c3CxXuwqMzaxRarewtz3E/s6wBBdCiAFVua1oqjLiVORifB0pgldf7qDSZcl3d8QYkcBCFKUSh5lZlU7CidSw85kvnVnGtFI74USaJ7e0jFEPxUjZzBpeu5m9HSEOdoUxRphiWAgxsXlsJsoc5mOWtYrCYxgG7cEYNR4b08vsstR4ApPAQhStWo+NulIbnaH4sPZbqIrCBxdnZy3+vPEQsaQUWio0drOOx2Zid7sEF0KI/qmqQo3HRlJSzxa8jlAcr93MnEqX1IGa4OSnK4qWqirUlzspH8F+i3PnVFDtthKIpfjHttYx6qEYDbtZx23NBhcN3REJLoQQxyh1mHFbTIRkWWvBCkSTaKoUwZssJLAQRS1b38KJSVeGNR2uqQofOC07a/HHDYdISuXnguSw6DgtJna1hWiU4EII8RZmXaXGayEUl+VQhSieyhbBm1UhRfAmi4IILO6++25mzJiB1Wpl6dKlvPrqqwO2ve+++zj33HMpKSmhpKSECy+8cND2YuLz2rP1LSLD3G9xwYmVlDnMdIcTrN7ePoY9zA1NVbDoKtoky/nttOg4zTo720I09UhwIYToq9xlxWbSCqquRTpjEEumCUSTdIXitPpjNPujNPujtAVik2LpVjpj0BnKFsGb4pUieJNF3gOLRx99lJUrV3LzzTezfv16FixYwMUXX0x7e/8nes899xwf+chHePbZZ1m7di11dXW84x3v4NChYwueicmjd79FOD7kAdukqbxv0RQA/m99U8EO9BZdpcpjYXalgxKHidmVDqrcFix63v98x43Teji4aA3R1BPNd3eEEAXEadGpcFvGvRJ3xsgGD6FYiu5wgrZAjGZfNnjoDMWJJFKoioLXbqa+ws7JtW4W1nkpc5ppDUQJxQonEBoLHaEYFS6LFMGbZBQjz5f/li5dyumnn85dd90FQCaToa6uji9+8Yt87WtfO+7j0+k0JSUl3HXXXVx11VXHbR8IBPB4PPj9ftxu96j7LwpHPJVmS5MfXyRJlds6pMfEkmk++ZvXCMRSfOOSE1k+t4JUxiiYIMOiq0wttbHq+b08sOYAgWgKt01nxVn1fHb5TJq6o8QnUarFYCxJJJHmhBoXU0vs+e6OGKJcj7syjou36grF2dDgo9RhxpTDzcGGYZBMGyTTGRLpDMlUhlTGQFEMFFRMuoJJU7GZVRxmHbtZx6KrWHQNs65i0dVjTqqT6QzNvigHOsMk0hnKHZYJt6G5J5xAVeHUOq/Uq5gAhjPm6uPUp34lEgnWrVvHDTfc0HubqqpceOGFrF27dkjPEYlESCaTlJaW9nt/PB4nHj+6sTcQCIyu06JgWXSNOZUuNjb14I8m8diOP5hZTRqfOmcmJ9a6OHt2OeF4Co/NhC+SxBdJ5v2k3eswser5vfxs9Z7e2wLRFHeu3g3AlUun0RYYfqHAYuWymjAM2NEaRFEUmV6fJGQcF8dTYjdT4jARiCYpcw6/RkIynTn8ZZBIZUhlMhiAApg0BZOuYTNrVLgsOMw6FpOKWVN7/x1OYGDSVKaXOfDazOzvDNEaiOGymHAP4ZhVDCKJFIl0hlNqPBJUTEJ5DSw6OztJp9NUVVX1ub2qqoodO3YM6Tm++tWvUltby4UXXtjv/bfffju33HLLqPsqioPHbmJ2pYtth/xYdBWrafAMFBZd5VNvq+ee5/by5cc25XxGIGNkD1LxVIZ4Kp39N/mm/09liCff9P+pdO/9Jk3l2/8xnwfWHOj3ue9fs59rz59FZyhRMDMs48FtM2FEYUdrAAWoleBiwpNxXByPqirUem1sDvnIGAZqP3USUkcCh3SG1OEZCONw+KBrCmYtGyR47WacFh2LSesz85DLmRDIHq/mT/FQ6jCzvytMiz9KudOS89cZT8l0hp5IgjmVLimCN0nlNbAYrTvuuINHHnmE5557Dqu1/6UvN9xwAytXruz9PhAIUFdXN15dFHlQ67ESjCY50BWm2m0bdLPzkRmBnz9z7IyAgcElp9Twty0tAwcFvYHAscFCLJUZVUXYeVUuOkNxAtH+1+EGoim6w0n8kSROa1H/KQ+bx2bCfzi4UBWFas/Qlr6J4iTjuBiKUocZpyU742zSshW5kxmjt86RpiqYNAWzpuGymnFZjwQP2cDBrGcDi/Es3qZrKtPKHHjsZg50hmnxx3Ba9CHNuBeaI0Xwar1SBG8yy+vZSHl5OZqm0dbW1uf2trY2qqurB33sD3/4Q+644w7+9a9/ceqppw7YzmKxYLFI1DyZKIpCfYWDcCJFZyg+4H4LTVXw2kwDzgg8sOYAn1s+i0dfb6Q7nBh1v0yagkU/ehB78wHNomtYTEf/32pSKbWbqXBZcNv0foMLt03HbdO59nfrcVp0ltaXckZ9KXOrXP1erZtoskvWDLa3+FEUhryvRhQfGcfFUFh0jVqPlYbuCJDdNO2wZJcwmY+Ms4eDh0LbTOyxvWn2ojNMsz9KRZHNXhwpgje70jnh9oyIoctrYGE2m1m8eDGrV6/msssuA7Kbt1evXs0XvvCFAR/3/e9/n1tvvZV//OMfLFmyZJx6K4qJRdeYXeFiU9yHL5LAaz82f7auZmtfDDYj4IskufCEStpD8cGDAl09HBgcvd3a2yZ7UBtJmthALMmKs+p791S82SfOmsHGhuz76w4naOiO8Ni6Jrx2E6fPKGVpfSkLpnqPuxysmHntZnoiCbY3Z5dFVUpwIcSkNq3MQbXHNuIxN580VaGu1I7XbsoGF74oToupKGYvgrEkmqIwp9KJ3Ty5ZtBFX3n/6a9cuZKrr76aJUuWcMYZZ/DTn/6UcDjMihUrALjqqquYMmUKt99+OwDf+973uOmmm3j44YeZMWMGra3ZqslOpxOn05m39yEKT3a/hZNtzX5iyfQxJ9ipjIHHZhp0RqDcaeYTZ9fnbQ+DL5zks8tnAtk9Ff3tAXnok2fy+sFuXj3QzbqDPfgiSZ5+o42n32jDrKksrPNyRn0pZ8wopWQCFigqsZvpCSfY3hIABSpdElwIMVlpqlL01Z1dVhMn13ooc1rY3xGi2RelwlW4sxfxVJpQPMWJNe4RbZwXE0veA4srrriCjo4ObrrpJlpbW1m4cCFPPfVU74buhoYGVPXoH9M999xDIpHg8ssv7/M8N998M9/61rfGs+uiCNR4rIRiKfZ1ho7Zb5HOGPiiA88IrDirHl8kmdeN0fFUhqbuKFcunca158/CH03hsen4IsnejeVOq8558yo5b14lyXSGN5oDvLK/i1f2d9MejPPqgWzQAdl9G2fUZ2czppVOnDWwJYcLHW5vCaCgUCGbBoUQRUxTs1nvPDYT+zuyG7ttJg2PzVRQ4/aRInjTy6QInsjKex2L8Sb5zyefRCrDlkM+usMJqt19B74jdSL+9/l9A84I5Dvl7BGaqqCrypDrbBiGwYGuSDaw2N/FrrZQn/ur3BaW1pdxRn0pJ9e4J8Sa2K5QHFVVOKnWTblcOSsYUsdCiJHLZAxaAzH2dYYJx5OUO6yYC6RAamsgSqnDzPwpHix6cc8UiYENZ8yVwEJMCv5oks1NPjA4Zr+FRVfx2k147aY+MwKFUMcil7rDCV470M0r+7vY1OgnkT763hxmjcXTs5u/F08vwWnJ+2TmiHWG4uiawkkyLV8wJLAQYvTC8VTv3gurruG153f24kgRvFOmeotiH4gYOQksBiEHpMmrxR9l6yE/Xpu53w3Nw50RKGaxZJqNjT5e3Z9dJuWPJnvv01SFk2vdh7NMlVE9xA3RhfT5dQTjmHWFk2qzWVZEfklgIURuZDIGbcEY+zrCBGNJyp2WvMwURBIpgrEU86d4JN33JCCBxSDkgDR5GYbB7rYQ+zrDVLutRZcxZKykMwa724K8sr+bVw5009gd6XP/9FL74X0ZZcypch6Tytaiq3gdJrw2U2/F80KY8WkPxrCYNE6ucU/ITevFRAILIXIrkkhxoDPMIV8Us6ZRMo6zF8l0hvZgjNmVTmZVOAtqz4cYGxJYDEIOSJNbIpVhW7OfzlD8mP0WIqvZFz28L6Obbc1+3jz54LWbOONwKttTD09/Ty21ser5vTyw5kDB7VFpD8SwmjVOrnX3m3JYjA8JLITIPcMwaAvEOdAZxhdNUubofzY+16/Z7I9S67Vx0gTZmyeOTwKLQcgBSQRiSTY3+sgY2VSlYmDBWJJ1B3t4ZX82lW00me69z6yrPPifZ/DS3k5+tnrPMY+97oI5XLl0Gm2B+Hh2+RhtgRh2s8bJtR48dlkHnA8SWAgxdqKJNAe6QjT1xDCpCqUO85jNInQE49gtGqdO9Ui9iklkOGOu/FaIScdtNTG70sWWQ/3XtxBHuaymPqlstx7y8+rhJVPpjMEpUz18+sHX+33s/Wv2c+35s+gMJfK656LSZaE9GGd7S4C51S7ZcyGEmFBsZo0Tqt2UOSzsO1y1u8xhyfmxLRhLoipIETwxKPnNEJNSldtCMGZnb4fstxgqk6ayaFoJi6aV8Jm3zaQrHMcfGbxyuT+aQleVvAYWiqJQ6bLQGYqzsbGHqV47daX2oi+iJYQQRyiKQqXbittm4kBnmKaeKKF4ilKH+Zh9cSMRT6UJxpJSBE8clyyOE5OSoihML3NQ5bbQHozluztFR1EUqtw2ypxm3Lb+r0+4bToOi8avX9zPnvYQ+Vx1qSgKFS4rDrPOvs4Q6xt6aOqJkEpPnHTCQghhNWnMq3Zxap0Hm1mjxR8lmkgf/4GDSGcMOsNxppXZmVpiz1FPxUQlgYWYtMy6yuxKJ3azRk84ke/uFJ03Vy7vz9XLZvDi7k4eeb2R63+/kS/8bgN/XN9Edx4/a7tZp9ZjI5Mx2HoowKYmH12heF6DHiGEyKXsLK2VhXVeZpY7CcSTdATjZEY4znWEYlQ4LcyscKLK7L44DgksxKTmOrzfIpHOjPqqzmTkCyf57PKZXHfBnN6ZC7dN57oL5vD582YRiad525xyTJpCQ3eE+9ccYMUDr3LLX7bx4p5OEnnIGKUoCl67mWq3FV84yYZGHztaA4Tj/S/pEkKIYmQ1acypcrJgqhenNTt7EUkMb5zriSSwmTRmV7qksrYYEskKJSY9wzDY2xFiT3uIardN9lsM01Aql4fiKV7c3cnqHW3saA32PtZh0XjbnAouOKGKuVX5yYceS6bpCsdxWHSml9qp8dowSQrFnJKsUELkVzyVprErwsGeCBhQ5rAc91gnRfDEEZJudhByQBL9SaYzbDvkpy0Yp9Yj9S1GYqiVt5t6Ijyzo51nd7bTGTq6LGpqiY23n1DJ2+dV5mVzYCCaJBhPUua0ML3MToXTIoWfckQCCyEKQ2cozv7OMF2hBCV204DZnVKHi+DNkiJ4AgksBiUHJDGQYCzJliY/gWgSq0nDZtawmTQZUMdIOmOw5ZCf1dvbWLOvq3dZlKrAwjovbz+hijNnlo7r9Hs6Y9ATSZDKGNR4rEwrs+O2Su2L0ZLAQojCEU+laeyO0NAdwchAmbPv7IVhGLQEolR7bJwsRfAEElgMSg5IYjCBWBJ/JElnKE4gliSWSKMoCjaTht2sY9ZlgB0LkUSKF/d0snp7O2+0BHpvt5s1zp1dzgUnVnFCtWvcgrxEKkNnKI7VpDKt1E5tiU3WF4+CBBZCFJ7ucIJ9HSE6Q3G8NjMOS3b2QorgibeSwGIQckASQxVNpAnGkwSiSTpDCSKJFMm0ga4o2C06NpMm+zHGQLMvyjM723lmRzsdwaNVu2s9Vt5+YhXnz6ug0jU+631DsRT+WBKv3cSMMgeVLotkRRkBCSyEKEyJVIamnggNXRFSGQOrSSOVzjB/qodyqVchDpPAYhByQBIjkckYhBIpQrEUPeEEPdEk0XiatJHBZtKxmzUsuirLpnIoYxhsPeRn9fZ2Xtrb2bsRXAFOnerhghOrWDazbMwrp2cMg55wgng6Q7XbyvQyO167VO8eDgkshChsPeEE+zpD9ESSzK10Mq3Mke8uiQIigcUg5IAkciGRyhCKpwjGssumgrEUsWQGXVWwmjTsZk0yC+VQJJFizd4uVm9vY2vz0aVSNpPGObPLueDESk6qcQ8Y2A11Y/lgkukMXeE4JlVlaqmNqSX2MQ9qJgoJLIQofMl0Bl8kSZnDLDOzog8JLAYhBySRa4ZhEE2mCcVS+CJJukIJwskUqUwGs5oNMmxmDVVmM3KiNRDj2R3trN7RRlvg6FKparc1m1XqhEqq3NmlUhZdxesw4bWZ8EeTeGymY1LhDlckkaInksBtNTGj3EGV2ypL4o5DAgshhCheElgMQg5IYqylMwahWIpgPBtkBGJJook0hpG9wm4za3KlOwcyhsEbzQFW72jjpT1dRJNHCxyeMsXD5Yun8sElU1n1/F4eWHOAQDSF26az4qx6Prt8Jk3d0REHF4aRrToeSaSocluZXuagxG6SpXADkMBCCCGKlwQWg5ADkhhvsWQ6u2wqmqQznCAcSxFPp1EVFfvhZVOSzm90Ysk0a/Z28cyONjY3+TGA+65azOYmPz9/Zs8x7a+7YA5XLp3WZ8ZjJFLpDF3hBIoKUzw2ppXZJYtKPySwEEKI4jWcMXfyHgHDYdD6uWqsaWC19m03EFUFm21kbSMRGCimUxSw20fWNhqFzCBXYR2OkbWNxSCdzk1buz3bb4B4HFKp3LS12bKfM0AiAclkbtparUd/V4bTNpmERAIrYFWg3K4ww2YmnNAIx1L0pBW649kT03Q8jtVIYz88m/HWK9+G2QL64T/XVAolMfAJsWEyg8k0/LbpNEo8NvB7000YZvPw22YyKLFobtpqOoblcKYSw0CJRgCwARdMc3DBtJl0BGK8frCbs+tcfPmxTb1tbcmjn8Mjz23nmjOq6YpFsnsuVA3jTX/3SmSwv+WjbXVNpVpPE0ukaWwM092uMq3UQZXHmt1jI2NE1mDvezRkHJdxfJzG8QFZ3jQ2D6dtKpX9LAZi7juOD7ltOp392Q3EZMq2H27bTCb7u5aLtrqe/Swg+zcRieSm7XD+7mWM6L/tQH/3wxnDjUnG7/cbgOHP/viO/brkkr4PsNv7bweGsXx537bl5QO3XbKkb9vp0wdue9JJfduedNLAbadP79t2yZKB25aX9227fPnAbe32vm0vuWTgtm/9Nbr88sHbhkJH21599eBt29uPtr3mmsHb7t9/tO1XvjJ4261bj7a9+ebB27766tG23//+4G2fffZo27vuGrztX/9qJFNpoyccNzp/vmrQtgfuecDY3OgzNjf6jAP3PDBo24Yf3d3bdv8Djw7atuk7P+htu/f3fxm0bfM3vt3bdvdfnhm0bev1X+1tu/Nfawdt2/7ZL/a23b5m06BtO6/6VG/bbRv3DNo2+tGPGdO/+ldj+lf/apxw/R8Gbbv7be80/vVGq7GpocfY3OgbtK3/7e/o7cPmRp+Rtg08RmRkjMh+ZmAAht/vN3JBxvE3kXE8K4/jeK/77x+87e9/f7Tt738/eNv77z/a9q9/HbztXXcdbfvss4O3/f73j7Z99dXB295889G2W7cO3vYrXznadv/+wdtec83Rtu3tg7e9+uqjbUOhwdtefrnRx2BtZYzIfg1xjBjOGD55ZyyEKAC6pmZTlx4nX7g/miAUS8oym2EwaSpum04gOsjV0cN2tgW59jevU2I3MbfKxaM56kMkkSITS+KS6t1CCCEmgcm7x6K5uf91YjI91n9bmUIffttRTqFnMgbhRIpwPEV3WqEnbmQ3gSeTKIkEJk3BpKlYdBWTpvamB5yMS6H6U1nq4KENbdy5ejcYfZdCXXPeLN63aAp/3niI3e1hdnSE2R1I9aaitSWy701VoK7UxuxKF3MqncytclFb6kS1H/1bHmjZVCKVpjOSxOJ2Mr3UTq3Xjjk+yHubwGNEIBDAU1ub+z0WMo7LOF7g4/iAbWUpVPb/DUOWQo2k7TiPEcMZwydvYCGb/kSRSaYzhOPZehnRRIpgPEU4niaRSpNMG2QMAwMwa2qfgGOypkK16Nl6E//7/D7uX7P/uFmh4qk0ezvC7GwNsLMtxK62YJ/K30fYTBpzKp3Mq3Yxt8rFvCoXJY6BC+YFY0kCsSSlDjPTyxxUOCdf9W7ZvC2EEMVLskINQg5IYiLJZAziqQyxZJp46tiAI5EyyP6XDTjMmop5EgUcFl3FazfhtZvwR1N4bPqw6lh0hxPsbAuyqzXIzrYgu9uDxJLHPq7SZckGGdXZQGNmhQOLfnRT8ZHq3clMpjc9rcdWHMujookUmqoSPLykK5XJDHtJngQWQghRvCQrlBCThKoq2A4X4HuzAQOOWJpIItUbcEB2L8KRgMOsqxOqkF88laEtEKczlEBXFTqC8WFV3i51mFk2s4xlM8uAbI2Sxu4IO9uygcbO1iCN3RHag3Hag3Fe3NMJZCt915c7mFeVndU4odpFjcdKKmPQ6o/RHU5QV2JnaomNjGGM+sR9rMSTaVb1M+NzzXmzsEgtFiGEEG9RGEcvIUROjTTg8EUmZsCRzhjDCigGoqkKM8odzCh3cPHJ1UB2g/bu9hA7W4PsOhxw+CJJ9rSH2NMe4sktLQC4LDpzDgcZ00rtJFJparzWfpdqjceJu2EYGAa9S+gyh783DDDIfl6//Pf+7B6VwwLRVO/3n10+s2ACICGEEIVBjgpCTCLDCTgCsRSR+MQOOHLBbtZZMNXLgqleIHvC3h6MZ4OMw0uo9naECMZTrG/oYX1DD5At4HfPc3v7FPA7cuJuYPCB06ay/qCPVCZDxiD7bwbSb/6+z+3ZfTapjEEmY5DKZEgfbp82IJ02SBvZ2zJGNnAwDIO0kf35Z4zDXxlwWDR++MGF3L9mf7/v+f41+7n2/Nlj/tkKIYQoLgURWNx999384Ac/oLW1lQULFvDzn/+cM844Y8D2jz32GN/85jc5cOAAc+bM4Xvf+x6XXHLJOPZYiIllKAFHLJUtBPfWgCODgUJ2D8ebK4gfCTeOxB0KSp873nr/0ccp/d+uHOf+3nZ971De8nrjodJlodJl4ZzZ5UB24/2BrjA7W7ObwtsDMc6eXX60gN9bPLDmAJ9bPovvPPkG3eFBssyMkXlVLjpD8QFT9QaiKYKxJGXHSZMshBBicsl7YPHoo4+ycuVKVq1axdKlS/npT3/KxRdfzM6dO6msrDym/Zo1a/jIRz7C7bffznve8x4efvhhLrvsMtavX8/8+fPz8A6EmLgGCzhiqTTxZKZPwBFLprNVdI6sOjKO/HN0EzlG7829DY2+3/bOjrzlnz7P1+dxb3k8b1n1dMzz5oHLamLJjBKWzCjBadEJxlKDnrj3hJMsmublQGcYVVFQFPr8299tCvR/e3/t1KPtVSUbkB3512XVqXBZBqwD4rbpUptDCCHEMfKeFWrp0qWcfvrp3HXXXQBkMhnq6ur44he/yNe+9rVj2l9xxRWEw2H++te/9t525plnsnDhQlatWnVM+3g8TvxN+Z8DgQB1dXWSTUSIMXZkaDHeEhwYxwkm3joiDXT/8Z7vmCCkgPLfKYDdorPk1qcHPHF//RsXEYmn8hYKaarCr17su8fiiOsumDOsPRajzeIk47gQQuRP0WSFSiQSrFu3jhtuuKH3NlVVufDCC1m7dm2/j1m7di0rV67sc9vFF1/Mn/70p37b33777dxyyy0567MQYmiOLEk6dgvG5N6TcUQ0kWLFWfX9nrivOKueVCaDd5D6GOPhmvNmAeQ9K5SM40IIURzyGlh0dnaSTqepqqrqc3tVVRU7duzo9zGtra39tm9tbe23/Q033NAnEDlypUsIIfLJZtYL5sR9IBaTxmeXz+Ta82f3SYc73n2TcVwIIYpD3vdYjDWLxYLFIhsMhRCFp1BO3AdzZLnTkY3aZtTBmo8JGceFEKI45DWwKC8vR9M02tra+tze1tZGdXV1v4+prq4eVnshhChkhXDiLoQQQuRCXo9gZrOZxYsXs3r16t7bMpkMq1evZtmyZf0+ZtmyZX3aAzz99NMDthdCCCGEEEKMvbwvhVq5ciVXX301S5Ys4YwzzuCnP/0p4XCYFStWAHDVVVcxZcoUbr/9dgCuu+46li9fzo9+9CPe/e5388gjj/D6669z77335vNtCCGEEEIIManlPbC44oor6Ojo4KabbqK1tZWFCxfy1FNP9W7QbmhoQFWPTqycddZZPPzww9x44418/etfZ86cOfzpT3+SGhZCCCGEEELkUd7rWIw3v9+P1+ulsbFR8p8LIcQ4OJLFyefz4fF4Rv18Mo4LIcT4Gc4YnvcZi/EWDAYBJFWhEEKMs2AwmJPAQsZxIYQYf0MZwyfdjEUmk6G5uRmXy9VbwGuojkRscpXsWPLZDEw+m4HJZ9O/ifa5GIZBMBiktra2z9LWkZJxPPfkcxmYfDYDk89mYBPpsxnOGD7pZixUVWXq1Kmjeg632130vyRjRT6bgclnMzD5bPo3kT6XXMxUHCHj+NiRz2Vg8tkMTD6bgU2Uz2aoY7gkTBdCCCGEEEKMmgQWQgghhBBCiFGTwGIYLBYLN998MxaLJd9dKTjy2QxMPpuByWfTP/lcxo58tv2Tz2Vg8tkMTD6bgU3Wz2bSbd4WQgghhBBC5J7MWAghhBBCCCFGTQILIYQQQgghxKhJYCGEEEIIIYQYNQkshBBCCCGEEKMmgYUQQgghhBBi1CSwEEIIIYQQQoyaBBZCCCGEEEKIUZPAQgghhBBCCDFqElgIIYQQQgghRk0CCyGEEEIIIcSoSWAhhBBCCCGEGDUJLIQQQgghhBCjJoGFEEIIIYQQYtQksBBCCCGEEEKMmgQWQgghhBBCiFGTwEKIAvGxj30Mq9XKrl27jrnvjjvuQFEU/vrXv/beFgwG+Z//+R/q6+uxWCxMmTKFyy+/nEgkMp7dFkIIwdDH8Oeeew5FUQb8uvXWW/PQeyFyQzEMw8h3J4QQ0N7ezgknnMDChQt55plnem/fv38/J598Mpdccgl/+MMfAPD7/SxfvpympiY+85nPMHv2bDo6Ovj3v//Ngw8+SElJSb7ehhBCTEpDHcPb2tp4+umnj3n8gw8+yD//+U9effVVTj/99PHsuhA5I4GFEAXkvvvu4zOf+QwPPPAAV199NQDvete7WLNmDW+88QZTpkwB4JprruF3v/sd69evp76+Pp9dFkIIcdhQx/D+zJkzB0VR+p3xEKJYyFIoIQrIpz71Kc4++2y+8pWv0NXVxSOPPMJTTz3Fd7/73d4Dks/n4/777+czn/kM9fX1JBIJ4vF4nnsuhBBiKGN4f1599VX27NnDlVdeOY69FSL3JLAQooAoisL//u//4vf7+fznP8/111/PkiVLuPbaa3vbvPjii8RiMWbPns3ll1+O3W7HZrNx9tlns3Hjxvx1XgghJrmhjOH9eeihhwAksBBFT5ZCCVGAvv71r3P77bejaRqvvvoqp512Wu99P/nJT1i5ciVlZWXMmjWL6667Dr/fzy233EIikWDbtm3U1NTksfdCCDG5DTaGv1U6nWbKlClMnz6dV155ZRx7KUTu6fnugBDiWOXl5QDU1tYyf/78PveFQiEge2Vs9erVOJ1OABYtWsSyZcu4++67+e53vzu+HRZCCNFrsDH8rVavXk1bWxtf//rXx6NrQowpWQolRIFpbGzk5ptvZv78+TQ2NvL973+/z/02mw2ASy+9tDeoADjzzDOpr69nzZo149pfIYQQRx1vDH+rhx56CE3TuOKKK8aph0KMHQkshCgwX/jCFwD4+9//zgc/+EFuvfVW9u3b13t/bW0tAFVVVcc8trKykp6envHpqBBCiGMcbwx/s2g0yuOPP86FF17Y75guRLGRwEKIAvL444/zxBNP8J3vfIepU6fy05/+FLPZ3Gfj3+LFiwE4dOjQMY9vbm6moqJi3PorhBDiqKGM4W/2xBNPEAwGZdO2mDAksBCiQASDQf7rv/6LRYsW8cUvfhHIzk585zvf4amnnuKxxx4DYN68eSxYsIA///nPdHZ29j7+n//8J42NjVx00UV56b8QQkxmQx3D3+zhhx/Gbrfzvve9b7y7K8SYkKxQQhSI6667jrvuuouXX365T9XVdDrNGWecQWtrKzt27MDlcvHss89y0UUXMXv2bD772c/i9/v58Y9/TE1NDevWreuz90IIIcTYG84YDtDd3U11dTUf+MAH+N3vfpevbguRUzJjIUQBWLduHXfffTfXXHNNnwMSgKZprFq1itbWVm688UYAzj//fJ566ilKSkr4+te/zs9//nMuu+wynn/+eQkqhBBinA13DAd47LHHSCaTfPSjHx3v7goxZmTGQgghhBBCCDFqMmMhhBBCCCGEGDUJLIQQQgghhBCjJoGFEEIIIYQQYtQksBBCCCGEEEKMmgQWQgghhBBCiFHT892B8ZbJZGhubsblcqEoSr67I4QQE55hGASDQWpra1HV0V/PknFcCCHGz3DG8EkXWDQ3N1NXV5fvbgghxKTT2NjI1KlTR/08Mo4LIcT4G8oYPukCiyMVLxsbG3G73XnujRBCTHyBQIC6urre8Xe0ZBwXQojxM5wxfNIFFkemzd1utxyQhBBiHOVq2ZKM40IIMf6GMobL5m0hhBBCCCHEqElgIYQQQgghhBg1CSyEEEIIIYQQo5bXPRYvvPACP/jBD1i3bh0tLS08/vjjXHbZZYM+5rnnnmPlypVs27aNuro6brzxRj7xiU+MS3+FEMUvnU6TTCbz3Y0JxWQyoWlavrshhJgEZAzPvVyO4XkNLMLhMAsWLOA///M/ef/733/c9vv37+fd7343n/vc53jooYdYvXo1n/rUp6ipqeHiiy8ehx4LIYqVYRi0trbi8/ny3ZUJyev1Ul1dLXUlhBBjQsbwsZWrMTyvgcW73vUu3vWudw25/apVq6ivr+dHP/oRACeeeCIvvvgiP/nJTySwEEIM6sgBqbKyErvdLifAOWIYBpFIhPb2dgBqamry3CMhxEQkY/jYyPUYXlTpZteuXcuFF17Y57aLL76YL33pSwM+Jh6PE4/He78PBAJj1T0hRIFKp9O9B6SysrJ8d2fCsdlsALS3t1NZWZnzZVEyjhcOwzCIJTPYzLL0TYwfGcPHVi7H8KLavN3a2kpVVVWf26qqqggEAkSj0X4fc/vtt+PxeHq/pFqrEJPPkfW4drs9zz2ZuI58tmOx9lnG8cKQSmfY3RZiU5OPUDyV7+6ISUTG8LGXqzG8qAKLkbjhhhvw+/29X42NjfnukhAiT2TqfOyM5Web63G8MxSnPRjLUe8mh1gyzRstAfZ1hugJJ2jsDmMYRr67JSYZGcPHTq4+26JaClVdXU1bW1uf29ra2nC73b3TOG9lsViwWCzj0T0hhBBjINfjuC+S4EBnhFkVKaaVOdBUOVkZTCCWZFdrkM5gnEq3lXTG4JAvRqXLSplTjq9CiKOKasZi2bJlrF69us9tTz/9NMuWLctTj4QQQhSjWDLNzrYQO1sDxJLpfHenYHWG4mxp8tMdTlDtsWHSVKwmDcWAg10RUulMvrsohCggeQ0sQqEQGzduZOPGjUA2nezGjRtpaGgAstPfV111VW/7z33uc+zbt4//+Z//YceOHfziF7/g97//Pddff30+ui+EEHl14MABFEXpHUOH4oEHHsDr9Y5Zn4qFxaRR6bLQ0B1h6yE//qjkxX8zwzA45IuytclPIpWh2m3tM7NT5rTQEYrTFowP8ixCiOOZaON4XgOL119/nUWLFrFo0SIAVq5cyaJFi7jpppsAaGlp6Q0yAOrr63nyySd5+umnWbBgAT/60Y/45S9/KalmhRBCDJtJU6l22+iJJNjS5KM9IPsuANIZg30dYd5oDqBrKuVOyzHrrzVVwWbSONgZlhkfIQqYYRgk0xliyTSZcdgXldc9Fuedd96gm78eeOCBfh+zYcOGMeyVEEKIyUJTFardNrrDCTYf8jM7kaKudPLuu4in0uxtD9HQHaHEbsZuHvg0wWsz0eyP0tQdYXaVaxx7KYQ4nnTGIJ3JkEgbpDMGCqBrCuoYb4Avqj0WQgiRc+HwwF+x2NDbvjXldX9tRuCpp57inHPOwev1UlZWxnve8x727t3bb9vnnnsORVF48sknOfXUU7FarZx55pls3br1mLb/+Mc/OPHEE3E6nbzzne+kpaWl977XXnuNiy66iPLycjweD8uXL2f9+vUj6n+xKHWYcZp1drRO3n0XoXiKN5oDHOyKUOG0DhpUQDaLTIndTFNPFH9ElpKJPBnPMbzAx/Hm5mZS6QzRRJp/r3mZiy++mOlTqplWXc4l77hgXMZxCSyEEJOb0znw1wc+0LdtZeXAbd/1rr5tZ8w4ts0IhMNhVq5cyeuvv87q1atRVZX3ve99ZDIDb5r97//+b370ox/x2muvUVFRwaWXXtonN3kkEuGHP/whDz74IC+88AINDQ185Stf6b0/GAxy9dVX8+KLL/Lyyy8zZ84cLrnkEoLB4IjeQ7FwWHQqnNl9F9ua/QRik+dkuSecYOshP+2BODUeG2Z9aKcHdrNOMpPhYHeYTEbSz4o8GM8xvEDH8eeee56DDQ1cv/LLRBJpEukM4VCIj3384/zrmed49oUXmTlrNu99z3vGfBwvqnSzQggx2XzgLQfGX//611RUVPDGG2/gHOAgd/PNN3PRRRcB8Jvf/IapU6fy+OOP86EPfQjIFkBatWoVs2bNAuALX/gC3/72t3sf//a3v73P89177714vV6ef/553vOe9+TsvRUis57dd9EejBFN+plb6aTSbc13t8ZUqz/GzrYAqZRBjcc67Hz2ZQ4Lrf4YVW4rVRP8sxJiJMZqHL/r7l8wvX4myXSGT33mc3z/9tvQVAVFgfPffv7RJzPgZ3ffw7SaijEfxyWwEEJMbqHQwPdpWt/v29sHbqu+5QrvgQMj7tKb7d69m5tuuolXXnmFzs7O3itcDQ0NnHTSSf0+5s0puEtLS5k3bx7bt2/vvc1ut/cGFQA1NTW0v+m9tbW1ceONN/Lcc8/R3t5OOp0mEon0SaYxkWmqQo3HRlcozpZmP7OTaepK7KgTbN9FJmPQ0B1mT3sYs65S6R5ZTQqTpmLWVPZ3hvHaTVh07fgPEiJXCnwMh9yO4xkju3fCbrdTUzeDeCqNpihMmVJLR0c7R64LtLW18e1v3cS/n3+Bjo7xG8clsBBCTG4OR/7bDuLSSy9l+vTp3HfffdTW1pLJZJg/fz6JRGLEz2kymfp8ryhKn0QaV199NV1dXdx5551Mnz4di8XCsmXLRvWaxajMaSEUT7G9JUA4kWJWhXPCnDQnUhn2dYQ40BXGazPjsIzudKDEYabFH6XFF2VG+ciWiwgxIgU+hkPuxvFUOkMkniaeymAymQ7PTmQjCYW+4/hnPvmfdHd38f0f/Zhp06ahmcy84/y3jfk4LoGFEEIUqK6uLnbu3Ml9993HueeeC8CLL7543Me9/PLLTJs2DYCenh527drFiSeeOOTXfemll/jFL37BJZdcAkBjYyOdnZ0jeAfFz2nRMWsqDV0Rook0c6tcuKym4z+wgEUTaXa1BWnxxyh3WnISLKmKgttqoqE7SpnTUvSfkShMg2USLVSjGcfr6upIZQw6OrvYtWsXs+bOw8BAOxJMDDKJ+vLaNfzkZz/nne96FxhwoKFhXMZxCSyEEKJAlZSUUFZWxr333ktNTQ0NDQ187WtfO+7jvv3tb1NWVkZVVRXf+MY3KC8v57LLLhvy686ZM4cHH3yQJUuWEAgE+O///m9sNtso3klxe/O+i81JP3OqnFS6inMvgT+SZGdbgO5wgiqXFV3LXQ4Xl9VEsz9CY3eEE2vcw96rIUR/IokUncEYiVSGSDKNoqdRFQVVAVUd+/SpozXScfyWb38bu8tLeUUF3/nWTZSVZcdxTVVgCG951uzZ/O6hhzjttMUEAwG+fsPXxmUcl6xQQghRoFRV5ZFHHmHdunXMnz+f66+/nh/84AfHfdwdd9zBddddx+LFi2ltbeUvf/kLZrN5yK/7q1/9ip6eHk477TQ+/vGP81//9V9UVlaO5q0UvWy9CyvJVIath/wc7Cq+LEjtwRhbDvkIRFPUeGw5DSqOKLVbaPbF6ApPrmVzIncMwyAcT9Hsi7Kp0cdr+7vZ1RrIFnczDBKpDNFkmkgiTTieIhxPEU+mSaYzpDOZgpvVGOo4bhgGqXSGRCqb6vqmW77L/3zlepaffSbt7W089vjjwxrHf/G/9+Lz9XD2mWfwqf9cwWevuXZcxnHFKLSfwBgLBAJ4PB78fj9utzvf3RFCjINYLMb+/fupr6/Hai3OK81D8dxzz3H++efT09OD1+sd19ce7DPO9bg72ufb0x5kf2eE6hFmMArFUgRiCaaV2ZlZBPsuDMOgqSfC7vYQKgplzpFt0h6q9mAMr93EgqneMQlexMRjGAaheIpALEVnME5PJEEsmcasaTgsGlYlDaEOpk2fjuXI+GJAxsg+9kjSVgVQleyFAO3wbIaqZLMkFeoMWsYwSKWNw4GRwQsvPM+l77yIQ20duRvHDUhlDOwWDf2tm9QPy9UYLkuhhskwDPZ2hHBZTVS6LAX7iyqEEGJsOK06Jl3hYFeEWDLDnCoXzlFufh4rqXSG/Z1h9neGcVr0cdn7UOaw0BqI0RqIMbXEPuavJ4qTYRgE4yn8kSQdoRiBSIpYKoNFV3FadMocRwNgI5XhmKvgSjaIAAWt9zmzz5tMGyTS2UeoSnYP0NFAI/9LqAzDIH04oEilM6QNA4VsH/Uizz5XmCNhATMM6Agk2NcRZmqpjRlljuNWJxVCCDGxWHSNareNtmCMaCLNnCoXFa6xnQkYrlgyzZ72EE09EcocFqym8ZlZ0VQFh1njYFf2dW3mwp7REeMnk8kGE4FokrZAjEAsSSKdwappOCw6Zc7R/a4cmZl48zV5w8jOCqRTBgbZWQ3lyKyGovQGGeo4zGpkU8VmZydSaaN3I7ZpAs3syRnxCGWzhETpCSeZWeGgymWdcDnOhRDF5bzzziu49cWFyKypmLTRj9eaqlDjttIdTrD1kI9ZFU6mFki9i2Asyc7WIJ3BOJVu67ifuHhsJpr9UZp6Isypco3ra4vCks4YBGNJ/IeDiWA8RTKVwWbScVvHvu6JotCbRQnoXUKVThskMQADlWzaVk2lzxKqXP0tpzMGqUyGZDobWCjQJ1Xsm71t+XLC8eSxT1IkJLAYIbOuUuow44sk2dLkp6ckwfQyx6hzgQshxo6cdI+dYvhso4kUmqpiM+ssm1WGP5LEF0kST2WO/+ABKEp2z0IwlmR7S4BIIs3MCidmPX9XIDtDcXa1BgnFU1R7bNksMuNMURRK7GaaeiJUuCx47UPfdCqKXyqdIRhL4YskaAvGCcVSpDMGNpNGic084kDXOHZB1PAds4RK6Z3VSKYhkc6gkN2XcWQJlaYoKCq9AceQ+moYpDLZpU6pjEHGMFAVBdMQszqNt1yN4XIWPAqKolDiMJNIZWjojtAdSTKz3EG1W2YvhCgkRwrCRSKRSZ02dSxFIhHg2OJ7hSKeTLPq+X3cv2Y/gWgKt01nxVn1fHb5TJq6o6MKLiCbatWsq+zvChNLppmdh30XhmHQ7I+xuy2IYUC125rXfYB2s04wluJgVwSX1ZSXAEeMn2Q6QyCaDdY7QoeDCSOD3aRT6hh5MAGAqmMAsWgUqzX3Y3jfWQ0lO6vBm5dQZfdAHNmf0WdW4y1LqDJvmZ2A7GNMBf77n6sxXAKLHDDrKrUeG75okq2H/HSHE8wodxTsZj4hJhtN0/B6vbS3twNgt9sl8UKOGIZBJBKhvb0dr9eLphXeevpoIsWq5/dx5+rdvbcFoqne769cOo22QHzUr2PRNWrcNtoCMWLJDLOrnJSPcQamI9IZgwOdYfZ1hrGZNDy2wgjwSh1m2gIxqtxWqj0TNyPbZJVIZQjEkvSEE3QE44QTaTIZI7tfwmHOWVYwRVXBZKezowMAq82GMs6X/Q2Obg4/sltDPbxhQ1eyy5qysxSZ3tmJI7Mb6XHtaf/SGQPVODYrVK7HcDnzzZEj077JdIamngi+SIL6CifVbqtcpRGiAFRXVwP0Bhcit7xeb+9nXGg0VeX+Nfv7ve/+Nfu59vxZdIYSvVcXR/daCjUeK13hBFub/MyqdDLFaxvTWex4Ks3e9hAN3VFK7KaCSihi0lTMmsqBrjAljrFfTy/GXjyVJhBN0R2O0xlKEI6ngOwMVYXTMmbnPKqjhEwY2ts7CmYlUTbYMHr/H7IF4grxwlXGMDDr6oBLuXI1hhfO6DNBmDSVKV47/miSrU0+urw26ssd45LiTwgxMEVRqKmpobKykmSyeDfGFSKTyVSQMxVHBGNJAtFUv/cFoin80RS6quQksIDs71r5W/Zd1Jc7xmTfRTieYldbkLZAjAqnNa97OwZS4jDT4o/R3BOlvsKZ7+6IEYgl0wSiSbrDCTpDcSKJNIoCDrNOpWt8LqAqioLmLMXIeCHT/9/zeCu88KF/6YxBTyTBKbWefs9HczmGS2AxRjw2E3azRos/hj+apL7cQU2eNtEJIY7SNK2gT4JF7rmsJtw2vd/gwm3T8dh0OoKjXwrV3+uaNJX9nWGiiRRzqlw5TfDRE06wsy2IP5Kk2l24xxdVUfBYTTT0RCh3WeRCW5GIJtIEYkk6g3G6I9mZCU1VcZp1qtymvNWBUFQVVEkGMBxKxiCjGpgtVqxj/PdXeJc2JhCTlt17oaCwrTnA1kN+AjG5UiqEEOMpncmw4qz6fu/7xFkz8EWSOZuteCurSaPabaUtEGdLk5+uUG4CmFZ/jC2H/IRjKWo8hb/k1mnViSUyNHZHiiKD2GQVSaRo8UfZcsjHawe62dDgo8Ufw6Sq1HhsVLutOK16XovLicImMxbj4MjsRVsghi+aoL7MQa3XlrNNTUIIIQZmM+tcc94sgD5Zoa5eNoP/PLue9hxs3B7MkX0XnaEEW5r8zK7K7rsYyTrsTMagoTvMno4wZk2l0l08G6LLnGaafTEqXNaCKyY4mUUTafzRJO3BGD3hBLFUGl1RcVp1vDZTQe4XEIVLAotxYtKy0X4gmuSNliA9kSQzyh0Fk7lDCCEmMotJ47PLZ3Lt+bMJRJO4rDrP7+rgA/esZcVZM1hQ5x3T11cUhQqXJXsMaA4Qiaepr3AMKwVnMp1hb3uIg11h3DZz0WUetOgaqpLiYFcYr900oaoNF5tYMhtMdAbjdIUTRJMpdFXFadEpsZslmBAjVlyj0gTgPjx70R6M4Ytk09JOkdkLIYQYc0eyJUUSKbY2+3liUzN7O0L89uUD/HDqgnE5mXLbsvUu9naEiCbTzK50DmnfRTSRZldbkBZ/jDKHBaupOPcJlTrMtAWjtPpj1JXa892dSSWRyuCLJugKHd2ArSkKLquO1zayGTQh3koCizzQNZVqt41gLMmO1gDd4QT15Q6pTCqEEOMgkc4Wr/rQkjr+tb2NXW0hXt7XxbJZ5ePy+kf2XbQGokSTaeZWuSh1DDz++6NJdrUF6QrFqXJZi/pClKYqOMwmDnZHKHOaCyo17kSUTGfwR5N0hxK0h+KEY0k0VcVh0anO4wZsMXEV7+g0AbisJqpcNrpCCTY2+jjQGSKZHl31VyGEEENTYjfz3gVTAHjwlYYx28DdH11TqXHbCMdTbG7yccgX7XdTc3swxpYmH/5IkhrPxJjddlt1QtEkTd3RfHdlQkqlM3SF4uxpD/La/m7WH+zhYHcYFaj22KhyW3FaZAO2GBvFP0IVOU1VqHJbsegaO1uDbG7y0RNO5LtbQggxKbxv0RScFp3G7gjP7Rzf4omKolDpsqKrKtsO+dnTfvTikmEYNHZH2HrITyptUOW2TpgTQUVRKHVYaPJF5HiXI+mMQU84wd72EK8f6GF9g4+9HWEMAypdVqrdNlxWmaEQY0/mIAuE06JjM2l0heNsavQxrczO1BJ7QRY7EkKIicJp0bl88VQeWHOAh19t4G1zK8Z9U7HHZsJyeN9FJJHd1N3mj7G/M4zTok/Iug82s0YglqShO4LbZir4dLmFKJMxCMZS9ETitAbihGIp0kYGh9k0phWwhRiMBBYFRFOzV6+OVFLtDieYWeEcdO2tEEKI0Xn3KTU8sbGZ9mCcp7a2cumC2nHvg9WkUeWy0uKPEoynCMWSlNot2MzFuUl7KEodZlr9USrdFmo8tnx3pygYhkEglsIfSdAWjOOPJkmnDWxmjVKHWTJtibyT38AClN1UZcMfSbKxsYc97UHiqXS+uyWEEBOS1aTx4TPqAPj9641EE/kZb/XDRVWNjEGlyzqhgwrIpmG3mnQOdkaIJeUYNxDDMAjGkjR2R1h3sId1B7t5oyVANJ6m1G6m1mujxC5BhSgM8ltYoDRVodJtxWHW2dMeZnOTn84cVWwVQgjR10UnVlHjseKLJnlic3Pe+qEoCt5JdJLotZvwRRMc6pGN3G8Viqc45IuysdHH6we62dbsJxRL4bGameK1UyIzFKIA5f038u6772bGjBlYrVaWLl3Kq6++Omj7n/70p8ybNw+bzUZdXR3XX389sVhsnHo7/uxmnWq3lUA0yaYmH3vaZPZCCCFyTddUPnrGNAAeX99EMJbMc48mB1VR8NjMNPVECMhnTiSRotkXZdPhYGJLkx9/JInTYmKK106Z0yJ7L0VBy+tv56OPPsrKlSu5+eabWb9+PQsWLODiiy+mvb3/zBwPP/wwX/va17j55pvZvn07v/rVr3j00Uf5+te/Ps49H19H9l44zTp7OkJsavTREYz3m5pQCCHEyLxtbgUzyuyEE2n+b31TvrszaTgtOrFUhoauyKQ8rsWSaVr9MTY3+Xhtfzdbmnx0hRI4zDpTvDbKnMVbEFFMPnkNLH784x/z6U9/mhUrVnDSSSexatUq7HY7v/71r/ttv2bNGs4++2w++tGPMmPGDN7xjnfwkY98ZNBZjng8TiAQ6PNVrOxmnRqPjVAszaYmH7vbQrIuVQgx4Y3XOK4qCh8/cwYAf9nUQpcsPx035Q4LLf4YHZPkM48l07QHYmw75OfV/d1sauqhIxDHatKo8diocEkwIYpT3gKLRCLBunXruPDCC492RlW58MILWbt2bb+POeuss1i3bl1vILFv3z7+9re/cckllwz4Orfffjsej6f3q66uLrdvZJypikKFy4LbYmJfZ4iNjT7ag7FJeZVHCDE5jOc4fvqMEk6sdpFIZ3j09cYxex3Rl1lX0RSFg12RCVso1jAMOkNxdrQEeP1ANxsafbT4Y5gPF0usdFuxm3UUqTUhiljeAovOzk7S6TRVVVV9bq+qqqK1tbXfx3z0ox/l29/+Nueccw4mk4lZs2Zx3nnnDboU6oYbbsDv9/d+NTZOjAOFzZy9qhFLpNnc6Gdna1BmL4QQE9J4juOKonDVshkA/PONNlr8sql4vJQ6zHSF4rT4Jt5nnkhl2NOevRjY1BNFU1Wq3Vaq3FYcFgkmxMRRVDuAnnvuOW677TZ+8YtfsH79ev74xz/y5JNP8p3vfGfAx1gsFtxud5+viUJVFMqcFjw2Ewe6wtnZi4DMXgghJpbxHsfnT/Fw2rQS0hmDh15pGNPXEkdpqoLTYqKhO0I4nsp3d3LGF0mw5VC2ErbbYqLKbcVp0aUKtpiQ8hZYlJeXo2kabW1tfW5va2ujurq638d885vf5OMf/zif+tSnOOWUU3jf+97Hbbfdxu23304mMzGnTofCatKoPTx7sanJz47WAKEJNCgLIcR4u2rZdABe2NXB/s5wnnszeXhsJkLxNI3dxb+RO50xaDh80a8nnKTaPfFrkwiRt8DCbDazePFiVq9e3XtbJpNh9erVLFu2rN/HRCIRVLVvlzUt+0da7APQaCmHZy+8NhMHuyKsP9jDgc6QpKYVQogRmFXh5JzZ5RjAgy8fyHd3JpUyh5lD/ig9keJNPxuOp9jW7OeNliBmTaXKbUVTZYZCTHx5XQq1cuVK7rvvPn7zm9+wfft2Pv/5zxMOh1mxYgUAV111FTfccENv+0svvZR77rmHRx55hP379/P000/zzW9+k0svvbQ3wJjsrCaNKV47uqqwozXEhgYfrf4Y6czkDryEEGK4PrZ0OqoCrx3o4Y2W4s0oWGysJg0MaOgKF92xyzAMWv0xNjb6aPZFqXRZcFlN+e6WEONGz+eLX3HFFXR0dHDTTTfR2trKwoULeeqpp3o3dDc0NPSZobjxxhtRFIUbb7yRQ4cOUVFRwaWXXsqtt96ar7dQsFxWEw6Lji+SZHOTj0qXhWllDkrsJtkkJoQQQzClxMaFJ1bxzzfa+O3aA9z+vlNk/BwnpXYzrYEYlYEYtV5bvrszJLFkmgOdYRp7olg0lVqPTX5fxKSjGJNsDVEgEMDj8eD3+0e0ATCTMXhlXzcGRtFchUilM3RHEhjAFK+VulIHTkteY0ohxCQy2nE318+3pz3I/s4I1W7rcdt2huJ85sHXSaYNvnXpySyeXjKSLosR6AknMOkKi6aVFHxNh65QnH2dYbpCccocUoNCFJ5QPMXJtW7ctuGfuw5nzC2qrFBiZHRNpdJlxWPNZtuQ/RdCCDE05U4L7z6lBoDfvnyAzOS6FpdXXrsJfzRJU08k310ZUCqdYX9HiM1NPgLRJDUemwQVomBYdJUqj4V51U5mVTiwmjQiibFN7iOXrSeRbPYoO6FYih2tIVoDcaaX2al0yaYyIYQYyOWL6/jHtjb2dYR5aU8n586pyHeXJgVFUfDazDT1RKlwWfGM4ErrWArEkuzvCNPij+KxmWUlgCgoFl1laqmNVc/v5YE1BwhEU7htOivOquea82ZhGaMAWGYsJiGnVafGYyWezLC50ceWJh9dofikz6wlhBD98dhMvG/RFAAeeqWh6DYUFzOHRSeRynCwK0ymQD73TMag2RdlU6OP1kCMKpdVggpRcLwOE6ue38vPVu8hEM3OUgSiKe5cvZtfPLd3zGYuJLCYpFRFodRhptJlpTOUYEOjj+2tAYKx4k3vJ4QQY+U/Ftbituoc8kX51/a24z9A5EyZw0JbIEZnKJ7vrhBNpNnRGmDLIT8YUOuxoWtyKiUKi6YqeG0mHlhzoN/771+zH10dm99b+WuY5PTD+bU9VhON3RHWN/SwvyNELCn7L4QQ4gi7WeeDS+oAeOS1BhKpyVuUdbyZdRVdVTnQFc7r594RjLOpyUdDV4Qyhxmv3Zy3vggxGF1V8EeTvTMVbxWIpsbsQrIEFgI4uv/CrGrsbAuxscFHiz8qU/5CCHHYJfNrKHda6Awl+NuWlnx3Z1IpsZvpDido8UfH/bUTqQx72oNsavIRTaSp9dqw6LJBWxSuVMbAbTXhtvW/RM9t08css6kEFqKPI/svEqkMmxp9bJb9F0KICUZTlBFldzLrKh85Iztr8ft1jWOeXUUcpakKLks2s2EoPn6fuz+SZOshP3vaQ7gsOuVOi9SmEAXNMAwe39DEC7s7uHrZjH7brDirnlRmbGb/JLAQx1AVhRKHmSqXle5Qgg0NPra3yP4LIcTE4LaZUBVlRDOyF5xQxRSvjWAsxZ82HBqD3omBuG0mwvEUjd3hMb/Ylc4YNHZH2NjUQ3c4QbXbht0sG7RFYQvHU9z+9x3c9+/93PH3nXzynHquu2BO78yF26Zz3QVzuOa8WWP2+yx/JWJAuqZS6bYSS6Zp7I7SEYpTV2Kn1it5uoUQxctjM+Gy6oTiqWGnMNVUhY+dOZ3vPbWDP21s5t2n1hZcGtSJrMxh4ZAvRqXLSpnTMiavEUmk2NcRpqkngstiotQtP19R+Pa0h/jeUztoDcTQVYULT6ykzR/jyqXTuPb8WfiiSbw2M6lMZsxSzYLMWIghsJo0ar02zJrGrrYgGxt8NPuipNKyeVEIUXx0TaXKZSEUH9ks7FmzyphV4SCaTPPY64057p0YjNWkoRjQ0B3J+THIMAzaAjE2Nfpo6olQ6bKOqEqxEOPJMAz+vrWF//7DJloDMSpdFr73gVN5z6m1JNIGbYE4O1tD7G0PE0umx3zmTQILMWROi06tx0YilWHLIT+bm/x0yv4LIUQRKnGY0TWV5AhOTlVF4aozZwDwt60tdATznwZ1Mil1mGkPxmnL4eceT6XZ3RZiS5OfeDJDrceGSdLIigIXSaT44T938Yvn9pLKGCytL+XOKxYxt8rVp106YxCOpxiPszX5qxHDohzef1HptNATTrDx8P6LgOy/EEIUEbfVhMdmIhgb2UbgRdO8zK91k0wb/O61hhz3TgxG11RsJo2DneGcpEbvCSfY0uRnX2cIj81EmWzQFkXgQGeYlb/fxAu7O1AV+M+zZ/CNS07Eac3vLgcJLMSIHNl/4bWbaOyOsuFgD3vbpf6FEKI4qKpCtdtKNDmywEJRlN6MK6u3t9HUE8lh78TxeG0mArHkqD73VDpb0XtTow9fJEm1W/YPiuLwrzfa+PJjmzjki1LuNHPH+0/lfYumFkRALIGFGBWLfnj/ha6xuz3IhoYeDsn+CyFEEfDYTVh0dcQXRE6ocXPGjFIyBvy/V2TWYjwpikKJ3UxTdxR/ZPgz5qF4im3NAba3BLDoGlVuK5qa/5MyMTBNVbDo6qT+OcWSaX7yr13c+cxuEukMp00r4adXLOLEGne+u9ZLskKJnHBadBxmDV80yZYmP60uM9PLHJQ5zAURQQshxFu5LDpeuxlfJDniK9UfP3M6rx3o5qU9nexpDzG70pnjXoqB2M06gViSg91h5ls9qEM44TQMg9ZAjL0dYcKxFJUuq+ylKHAWXcXrMOG1mfBHk3hsJnyRJL5IkngeK7GPt8buCHc8tYOG7giqAlcunc7li6eiFtg5lgQWImeOXEFyWw26wnF6wj5qPFbqyuy4x6jCoxBCjJSiKFS6rbQFYhiGMaKLIDPKHSyfW8Fzuzp48OUD3PLe+WPQUzGQMoeFVn+MKreVKrd10LaxZJr9nSGaeqK9s+2isFl0lamlNlY9v5cH1hwgEE3htumsOKuezy6fSVN3dFIEF8/tbOfu5/YQS2YosZv473fM45Sp3nx3q18SWIic01SFSpeVeCpNk+9o/YspJbJ+VQhRWErsJmwmjego0jB+dOk0/r2nk/UNPrYc8nPKFE+OeykGYtJUzJrK/s4wXrsJi97/MaYzFGdfR4jucIJyp2XAdqKweB0mVj2/l5+t3tN7WyCa4s7VuwG4cuk02gITNytbIpXh3n/v4x/bWgE4daqHr7xjHiV2c557NjCZ/xNjxqJr1HpsWHWN3R1B1h+U/RdCiMJiN+uUOs2ERpgdCqDGY+MdJ1UB8Nu1ByQF9zgrcZjxRRK0+KLH3JdMZ9jXEWJzk49QLE2NxyZBRZHQVAWvzcQDaw70e//9a/bjtZsm7J6LZl+U//7DJv6xrRUF+PDpdXz7vfMLOqgACSzEOHBYdGrdNtIZgy1NfvZ3hvPdJSGE6FXpspLMGGRGERB8+PRpmHWVHa1BXjvQncPeieNRFQW31URDd5RQ/GiAGIgl2XbIz+62IHaTToXLUnDr0cXAdFXBH00SiPYf9AeiKbrDSeITMBvli3s6+dKjG9nXGcZjM3HLe0/myqXTiyKIksBCjAtFUfDazbitOu3B+IiKUgkhxFjw2Ew4LRqRxMhPUEodZi49tRaAB18+OKogRQyfy2oimkzR0B0mnTE45IuyscFHezBOlduGwyIrv4tNPJXBaTHhtvX/s3PbdNw2nc/9v/X87Jnd7O0IjXMPcy+ZzvC/L+zle0/tIJpMc1KNmzuvWMiiaSX57tqQSWAhxpXdrBNJpAhEpaCeEKIwWE0a5U4LofjoxqXLT5uKw6xxoCvCC7s6ctQ7MVSldgvNPTG2HvKz7ZAfVVGo8diK4iqv6CudMfjhP3fy4p6O3noxb/WJs2aw7kAPbcEYT7/Rxpce3cj//N9mXtjVUZQXL9sCMb76f5v56+YWAD5w2lRue98plDktee7Z8EgIL8aVpipkMhCIJovuj0UIMXGVOS00dEdIZ4wRn4g6rTrvP20qD758kIdeaeDs2eUTLpWppiroqkIqY5DOFNasjNWkEY6naA1EKXdYMesT67OfLNIZgx8/vYsXdnfQ4o/yf58/C1VRuH/N/mOyQjV2Rfj+B07lr5ubeWlvF9tbsrVJSuwm3nlyNRefXF0U5xqv7O/iJ//aRTiexmnRWXnRXE6fUZrvbo2IBBZi3NnMGm3BONPLHEPKOy6EEGPNazfhtOiE4ik8tpGnx37vglr+sqmZ1kD2Kuolp9TksJf5Uyy1BIrhJFIMLBtU7OSF3Z1oqsLli+to8cW4cuk0rj1/Fv5oCo9NxxdJ0tQdJZE2OLHGzYk1bj4ZTvCPba38fWsLPZEkv3utkd+va+LsWWW8+9RaTqx2FVxdrVQ6w29fPsjjGw4BMK/Kxf9cPI/K46ROLmQSWIhx57To9EQTBEd5ABdCiFwxaSqVbgt728OjGpesJo0PLanj3n/v49HXGnn7CZVFn2ZbagmI8XB0+VMnuqrw1XeewJkzy4inMrQF4nSGEuiqQkcw3u9sWanDzEfOmMbli6fy8r4u/rK5he0tAV7Y3ckLuzuZWe7g3afW8LY5FQXxN9kZivP9p3awvTUIZC9KfOKsGUU/yymBhRh3Jk0lmcoQOHzVSwghCkGpw8IBNUIqnUEfxcH9nfOr+dPGQ7QH4/x1cwuXL56aw16Ov8leS0CMvVQ6ww+f3sVLh4OKG951AmfUl/Vpkx7i8juTpnLunArOnVPB3o4QT25p4fmdHezrDPPzZ/bwwEsHuOikKt51Sg3VeZoZWHewhx89vZNgLIXDrHHdBXNYNqs8L33JteIOi0TRsuoanaG45HsXQhQMj82E06oTjI+8pgVkT2w+esY0AP5vfVOfFKjFZrLXEhBjL5XO8IN/7nxTUHHiMUHFSM2qcPJfb5/DAytOZ8VZM6h0WQjGU/xxwyE+89vX+c5f32B9Q8+4ZXFLZwwefPkg3/rLNoKxFLMqHPzkioUTJqgAmbEQeeKw6PijSSKJtKQBFEIUBE1VqHZb2NEaosQ+uuc6b14l/7fhEI3dEf64vomrBshsU+h0VaE7PHgtAX80ha4qBbeZWxS+ZDrDD/6xk7X7utBVhW9cciJLxmDTsstq4v2nTeU/Fk5h3cEe/rq5mQ2NPl490M2rB7qZ4rVxySk1XHhiJXbz2JyTdIcT/PCfO9lyyA/AJafU8Mmz6ydckoGJ9W5E0bCaNGLJNIGYpJ0VQhSOEocFk66QGOWeAU1V+PjS7KzFE5ua6QknctG9cdUTSXDn6t299QL647bpeGw6KQkqxDAl0xm+/48drN3XhUlTuPHdJ41JUPFmmqpwRn0p3/6P+dxz5WlcemoNNpPGIV+U+/69j0/c/xr3PL+Xhu5ITl93U5OP6x7dwJZDfmwmjf9+xzw+v3zWhAsqoAACi7vvvpsZM2ZgtVpZunQpr7766qDtfT4f1157LTU1NVgsFubOncvf/va3ceqtyCWTqtIdKr6DrRBi4nJZdDxWE8EcXPQ4c2YZc6ucxFMZfv96Yw56Nz4yhsE/trVyzUPreXJLCy/t6eQTA8y4rDirHl8kKbMVYliS6Qzfe2oHL+/rzgYVl5zE4unjWwRuaomdz7xtFg+sOJ3PLZ9FXYmNaDLN37a0cO3D6/nGn7awdm/nqH630xmD373awDf/tBVfJMmMMjs//tAC3ja3IofvpLDkdQ3Ko48+ysqVK1m1ahVLly7lpz/9KRdffDE7d+6ksrLymPaJRIKLLrqIyspK/vCHPzBlyhQOHjyI1+sd/86LUXNYdLojCWLJdEFkaBBCCFVVqPZY2RYKjPq5FEXhqjNncOOft/LUtlb+Y9GUvG0WHarG7gh3P7eHbc3Z9z+zwgEGfO68WShvqSVw9bIZfPptM2nuiea516KYJNMZ7vj7Dl490I1ZU/nGu0/ktDxWlrabdd59Sg2XzK9m8yE/T25u4ZX9XWxu8rO5yU+Fy8K75lfzjpOqB0w40199F18kwY+e3sXGRh8AF51UxWfOnTnhz3cUI4+7Z5cuXcrpp5/OXXfdBUAmk6Guro4vfvGLfO1rXzum/apVq/jBD37Ajh07MJlGlk0oEAjg8Xjw+/243e5hPz6TMXhlXzcGBi6rZDQajYxh0BaIsWhaCRUuyT0uxEQ12nF3rJ/vrULxFK/v78Zh0XNyEnDjn7awqcnP2+dVcv1Fc3PQw9xLpDI8tq6RP6xrIpUxsOgqH1s6nUsX1KKpSraOhd2E127CH03htOi8sLuDR19r5LoL5qAWWH0AUZiS6Qy3/W07rx/swaypfPM9J7Gwzpvvbh2jPRjjqa2t/GNbK4FYdn+RSVM4d04F7zmlhjlVLmDg+i6vH+jmxj9vozucwKKrXHPeLN5+QlXe3k86Y9AZinN6femIsnEOZ8wd1YxFIpFg//79zJo1C10f3lMlEgnWrVvHDTfc0HubqqpceOGFrF27tt/HPPHEEyxbtoxrr72WP//5z1RUVPDRj36Ur371q2ha/4N/PB4nHj+aBi8QGP1VKJEbqqKgKNmoXgILIcRAxnscd5g1ShxmukKJnAQWVy2bwZcf28SzO9t5/2lTmF7myEEvc2dLk4+7n9vLIV925mHJ9BI+t3wWVW+aXXlrLYE97UGuf3QjkUSa+bVuLjqpOl/dF0Uikcpw+98PBxW6yk3vPokFBRhUAFS6rFy1bAYfPn0a/97dwV83t7CnI8QzO9p5Zkc786pcfPzMabx/8dRj6rt84qwZXL1sBiV2Mw6zxlffeULB/c2PpRHtsYhEInzyk5/Ebrdz8skn09DQAMAXv/hF7rjjjiE9R2dnJ+l0mqqqvhFcVVUVra2t/T5m3759/OEPfyCdTvO3v/2Nb37zm/zoRz/iu9/97oCvc/vtt+PxeHq/6urqhvguxXiwmXQ6gnFSaSmuJITo33iP44qiUOm2kMykc5ISe26Vi2UzyzCA//fKwdF3MEcC0SR3rt7F1/+0lUO+KCV2E/9z8Txues9JfYKKN0tnDOKpDE6LiQ+fnv05/GbtwaJOqSvGXiKV4da/HQ0qbn5P4QYVb2bWVS44sYoff2gBP7x8AefNq0BXFXa2BanyWLnnuWx9lyNZ0wLRFD9bvYcH1hzgjvefwo8+uHBSBRUwwsDihhtuYNOmTTz33HNYrUcHnwsvvJBHH300Z517q0wmQ2VlJffeey+LFy/miiuu4Bvf+AarVq0atK9+v7/3q7GxeDbQTQYOs0YkkSYYk4OSEKJ/+RjHvTYzNpNOJJHOyfN97MzpqAq8vK+bnYcr7eaLYRg8u7Odax5ez7+2twPwzpOr+cWVizl3TgXKEJc1vefUWqZ4bfijSR55tWEsuyyKWDyV5rtPZutFWHSVb73nJE6d6s13t4ZFURTmVbv48kXz+PUnTucz59Zz9uxyfrP2QL/tf7P2AAvqPDitky+d/ogCiz/96U/cddddnHPOOX0GoJNPPpm9e/cO6TnKy8vRNI22trY+t7e1tVFd3f+Uak1NDXPnzu2z7OnEE0+ktbWVRKL/7EIWiwW3293nSxQOXVNJZTL4o5IdSgjRv3yM4zazRrnTPOpieUdMK7Vz3rxsUpLfvnwgJ885Ei3+KDc9sY0fP70LfzRJXamd733gVK49fzbOYdYUMmkqnz53JgB/3dJCY45TdIrilw0qtrOh0YfVpPKtS0/mlCILKt6qxG7m8sV1hOKpIdV3mWxGFFh0dHT0m7UpHA4P+UqH2Wxm8eLFrF69uve2TCbD6tWrWbZsWb+POfvss9mzZw+ZzNFlM7t27aKmpgaz2TzMdyEKhd2k0xFKkJF0hUKIAlLutJDJZHJWlfejZ0xDVxU2N/l7M8WMl1Q6uzn7Cw9vYGOjD5Om8LEzp3PnFQs5qWbkgdri6SUsrS8lnTG479/7crJ0TEwMsWQ2qNj4pqBi/hRPvruVE6mMgddmkvou/RhRYLFkyRKefPLJ3u+PBBO//OUvBwwK+rNy5Uruu+8+fvOb37B9+3Y+//nPEw6HWbFiBQBXXXVVn83dn//85+nu7ua6665j165dPPnkk9x2221ce+21I3kbokA4LDrBWJJQQpZDCSEKh8duwmE1Ec7RrEWV28o752dn5H+79sC4nYTvaA1w/e838tu1B0mkM5w61cNdHzmNK5bUYdJGX87qk+fUo6sKGxp9vLK/Owc9FsUulkzznSffYGOjD5tJ41uXnszJtRMjqIDsXiNfNMmKs+r7vX8y13cZ0eKv2267jXe961288cYbpFIp7rzzTt544w3WrFnD888/P+TnueKKK+jo6OCmm26itbWVhQsX8tRTT/Vu6G5oaEBVjw56dXV1/OMf/+D666/n1FNPZcqUKVx33XV89atfHcnbEAXCrKskUwaBaBK3pPAVQhQIi65R6bRwoCucs/TiH1pSx7+2t7G7PcTafV2cNas8J8/bn3A8xW9fPsjft7RgAC6rzqfOqef8eZVDXl0wFDUeG+9bNIXH1jXxyxf3cdq0kglZUVgMTSyZ5jt/fYPNh6tMf+u9J49qVqxQ+cJJPrs8uxTwzfVdVpxVz2eXz6Spe3LWdxlxHYu9e/dyxx13sGnTJkKhEKeddhpf/epXOeWUU3Ldx5zKRR2LLU1+4qk0NvPk25QzVjpDcUod5qLIEiGEGJ5iq2PxZp2hOBsaeqhwWtFytF76wZcP8vvXG6krsfHzj5yWs+c9wjAM1uzt4t4X9tEdye5fu+CESlacXT+iHPZDEU2k+dxD6+gOJ/jYmdO5YolkYJyMYsk0t/xlG1ubA9hMGre892ROnIBBxRFvre/isen4Ikl8kSTxVOFkuyyKOhazZs3ivvvuG+nDi1I0kUJTVWq8Vjx2E/4C/OUpVnazhi+SIJJIYZeATQhRILw2Ey6LiVA8lbOT8vctmsLftrTQ2BPl2Z3tXHhi7gpntQdjrHp+L68d6AGg1mPl2vNnj3kWHptZY8VZM/jR07t47PVGLjihknKn1CeaTKKJNLf8dRvbmgPYzdmg4oTqiRtUwLH1XTqC8Um5/OnNRnQGd6RuxUCmTZs2os4Usngyzarn9w043SXBxejYTBo9kSTBmAQWQojCoWsqlW4Lu9uDOQssnBadyxdP5YE1B3j41QaWz60Y9V6HdMbgL5ubeeiVg8SSGXRV4QOLp/KhxXXjtixp+dwK/ra1le0tAe5/6QD/ffG8cXldkX+RRIpb/vIGb7Rkg4pvv3c+86pd+e7WuElnjEkfUBwxojO4GTNmDLo+M53OTd7vQhFNpFj1/D7uXL2797ZANNX7/ZVLp9EWiA/0cDEEiqKgqQpd4fiAhZmEECIfSh1mNEUlmc7kZLMzwLtPqeGJjc10BOP8fWsr711QO+Ln2tMe4q5nd7O3IwzASTVurj1/NtNK7Tnp61ApisJnzp3Jyt9v5IXdHVxySvWE2rAr+hdJpPjWX95ge0sAh1nj2/8xn7lVkyeoEH2NaITcsGED69ev7/165ZVXWLVqFXPnzuWxxx7LdR/zTlNV7l+zv9/77l+zH6/dlPM1spOR06zTHU4ST02swFQIUdzcVhMeu4lQDgt5Wk0aHz4juw/h9683Eh1BIb5oIs0v/72PLz+2kb0dYRwWjS+cP5vb33/KuAcVR8yudPKOk7OZr/73hX1yFXeCiyRSfOuJbdmgwqLxHQkqJr0RzVgsWLDgmNuWLFlCbW0tP/jBD3j/+98/6o4VkmAsOWgRlO5wElUBOR0eHZtZoz0YIxhLYXFqx3+AEEKMA1VVqHZb2dbsp4Tc1Uy66MQqHt9wiBZ/jCc2HeKK04e+jPjV/d2semEvHcHsbPnb5pTzqXNmUuLIf02nj585nRd3d7C/M8w/32jlXfNr8t0lMQbC8RQ3P7GNnW1BnBad7/zHfGZXOvPdLZFnOV14OW/ePF577bVcPmVBcFkHL4Litulc/+hGntnRJldnRkFTFQzAF5Eq3EKIwuKxm7Doak5nVHVN5aNnZIOJP244RCCaPO5jusMJ7vj7dr7z5Bt0BONUuizcfOlJ/PfFJxREUAHgsZn46NLpQDYDVjB2/PcliosEFWIgIwosAoFAny+/38+OHTu48cYbmTNnTq77mHfpTGbAIiifOGsGa/d2sb01yE/+tZvPP7ROAoxRcJh1OkMJ+fyEEAXFZdHx2s0Ec7gcCuBtcyuYUWYnkkjzf+ubBmyXMQz+tqWFzz+0jpf2dqEq8P5FU7j7o6exZHppTvuUC5fMr2ZaqZ1gLMXDrwye8EUUl1A8xU1PbGVnWxCXRee7l0lQIY4aUWDh9XopKSnp/SotLeWkk05i7dq13HPPPbnuY97ZzDrXnDeL6y6Y0ztz4bbpXHfBHD63fBYVTgufOGsGbqtOiz/GT/61m2seWsczO9rlBHmY7GaNUCwlV7iEEAVFURQq3VYS6dxmAFQVhY+fOQOAv25uIRBNYNHVPvv2DnaF+er/beae5/cSSaSZU+nkJx9ayIqz67GaCnPZqK6pfObcbPGwv21t4UBnOM89ErkQiqX45p+3sqsthMuqc+v75jOrQoIKcdSICuS9tbq2qqpUVFQwe/ZsdL2wU4WOprBSJJFCV1X80SRum35MHYtoIs2TW1r444am3qtatR4rV5w+jeVzK2SD9xAd8kU5udZNXZ42HwohcquYC+S9WTie4vUD3VhNWk7TYhuGwV3P7OaDp9dxzpwKQrEkHpuJ7nCCB9ceZNXhTdA2k8bHz5zOJafUFM3x5La/bWftvi5OneLhu5fNz2nFbzG+jgQVezpCuK06373sFOrLHfnulhiC8SyQN+LK28VqPCpv9xdgTPHauOL0Ot42RwKM4+kOJ3BYNBZPL5GDkBATwEQJLAA2N/noCMSpzGFabIuuUuu18b8v7OU3aw/01kq6etkMPnHWDD70vy9T4TLz2bfNKrqic22BGNc8tJ5EOsPX3nkCZ88uz3eXxAgEY0m++eet7O0I47bq3HrZKcyQoKJoFGTl7SeeeGLIHXjve9875LbFKJJIYzBwPGYza1y+eCqXnFLNk1taeHzDIQ75ovz46V08+lqjBBjHYTdrBGNJQvEULmtuClIJIUQuVLqsNPtiGIaRswsfXoeJe/+9l58/s6f3tkA01fv9qo+dRixZnEVYq9xW3n/aFB55rZFfvbSfxdNLCnb5luhfMJbkxj9vZV9HGI/NxK2XzWd6mQQVon9DDiwuu+yyIbVTFGXCFcgbKbtZ54OL63j3KTXZAGN93wDjw6fXca4EGMewmjS6IwmCMQkshBCFxWs34bBohBNpnJbRL4fSVAWvzcQDaw70e/9v1h7gi2+fzZ72cNHu2fvAaVP51/Z2OoJxHt9wiI+cMfS0uiK/AtHsTMW+zjBem4nvSlAhjmPIm7czmcyQviSoONaRAOOXVy/hqjOn47LoHPJF+dHTu7j24fU8t1M2eb+Vriq9+dmFEKJQWE0a5U4z4XhuskPpqoI/OnitJH80hV7EF6CsJo3/PHsGAH9Y10R7IJbfDokh8UcPz1R0hvHaTdz6vlMkqBDHldM6FmJwdrPOB5dkA4yPS4AxKKdFxx9NEktKoCqEKCzlTitpw8jJeJ3KGHhsg9dK8th0UkV+bDhndjnza90k0hl+/dL+fHdH9ENTld6MZP5okhv/tIX9h4OK2y7LXzV3UVxGPI8bDod5/vnnaWhoIJHoW9Dsv/7rv0bdsYnMbtb50JI63nNqDX/dfHQPxo+e3sWjrzfy4dOncc7s8km9RMpm0vBFkwSiSVmPK4QoKB6bCadFJxxP4R7BRsg3S2cMfNEkK86q587Vu4+5f8VZ9fgiyaK/6KQoCp952yy+9OgGXtrbxeYmH6dO9ea7W4Js8gCvw4TXZjqc9dJE5744mqpScnimoq5EggoxNCMKLDZs2MAll1xCJBIhHA5TWlpKZ2cndrudyspKCSyG6M0Bxl82t/CnDYdo6onyw3/u5JHXGiZ1gKEoCpqi0B1O5DT7ihBCjJZZV6lyW7IZckYZWAD4wkk+uzxb8+H+Nft7s0KtOKuezy6fSVN3dNSvUQjqyx28c34Nf9vSwr0v7OPODy+alMe3QmLRVaaW2lj1/F4eWNM3I9ljn1vG1kN+SuyFUdFdFIcRLYW6/vrrufTSS+np6cFms/Hyyy9z8OBBFi9ezA9/+MNc93HCs5t1rlhSx6+uXsLHzpyO06L3Bhhf/N16XtjVUfRXq0bCbtboCidI5rgglRBCjFapw4KmKKRyMD7FUxmauqNcuXQar3/jQl77xoW8/o0LuXLpNJq6o721kiaCK8+Yhsuic7A7wt+3tuS7O5Oe12Fi1fN7+dnqPb37fI5kJHtgzX5OqHbluYei2IwosNi4cSNf/vKXUVUVTdOIx+PU1dXx/e9/n69//eu57uOkcSTA+OVVS/jY0mk4LBqNPVF+MEkDDLtZJ5JIEYhKFW4hRGHx2Ew4rTqhHG3ijqcytAXi7GkP0xNOsKc9TFsgPqGCCgC3zcTHzpwOwP975SB+Gd/zRlMVPINkJHtgzQG8dpPMKolhGVFgYTKZUNXsQysrK2loaADA4/HQ2NiYu95NUg6LzhWnT+NXV51+bIDxyAb+vXtyBBiaqpDJIIGFEKLgaKpCtdtCOJHbBBPpjEE8lZnQY/zFJ1dTX+4gHE/z0CsH892dSSWSSPHq/i7ufWEv33tqB+2B+ITOSCbG34j2WCxatIjXXnuNOXPmsHz5cm666SY6Ozt58MEHmT9/fq77OGkdCTDec2otf9nczJ82HqKxO8L3/7GTutJGPnJ6HWfPLkd9U5EmTVXQVYVUJjcZS/LNZtZoC8aZXuZAlcFNCFFASuwWTHqYRCqDWZcki0OlqQqfOXcmNzy+hae2tnLxydXMqnDmu1sTUjKdYVdbkI2NPjY1+tjZFuTIqUGpw0yZ04zbpvcbXBzJSCap38VwjCiwuO222wgGgwDceuutXHXVVXz+859nzpw5/PrXv85pB0U2wPjw6dO49NRantjUzJ/fFGBMO1xo7+0nVFLqNPdmdfDYTPgiSXyRZFFPpTstOj3RBMF4akRl6IUQYqy4rDoeq4lgLEmZ05Lv7hSV+VM8nDunnH/v7uTeF/Zxx/tPyVkl88nMMAwOdEXY1OhjU5OPrc3+Y6q213isLKzzsmCqF39k4mckE+NrRIHFkiVLev+/srKSp556KmcdEgNzWHQ+csY0Ll1Qy18OBxgN3RH+b322kun9a/b3yerw5owixRpcmDSVZCpD4HCwJIQQhUJVFarcVjpD/nx3pSitOKueV/Z380ZLgBd2d7J8bkW+u1SU2oMxNjX62NjoZ3OTD99blg97bCYWTPWysM7DgqnePpkWg7HUpMhIJsbPiAKL7373u1x55ZXU19fnuj9iCJxvCTAuOLGSX7+0n58/s6e3TSCa6r0CceXSabQFincq06prdIbiTC2xyRUtIURB8dpNWHWNWDItNXeGqcJl4UOLp/L/Xmng/pf2s7S+VD7DIQjFUmw+5Otd3tTs71vJ3KKrzJ/iYeFULwvqvEwvs/dZMv1mb85Idu35s/BHU3hsOr5IsqgvSor8GVFg8dhjj3HzzTezdOlSPvaxj/GhD32I8vLyXPdNHIfTovOxM6czq9LB9b/f2G+b+9fs59rzZ9EZShTtdKbjcBXuSCKNwzLimo5CCJFzTotOicNMVyghJ8UjcNmiKfzzjTbag3EeW9fExw9njBJHJVIZtrcGDs9K+NjbEeLNh3NVgblVLhbUeVk41cu8ahcmbeh7fo5kJOsMJdBVhY5gvGjPF0T+jegsbdOmTWzbto2HHnqIH/7wh3zpS1/ioosu4sorr+Syyy7DbpcKjeNFVxUC0eSgWR26wgnC8SRWU3GelFtNGt3hOIFYUgILIURBURSFSreFFn8UwzBkVnWYLLrGp86p57a/7+CP65u48MRKajy2fHdrTAw1uUo6Y7C/M5ydkWjy8UZzgMRb6qXUldpZONXDgjov82s9OTk2pidI0heRXyP+TTz55JO57bbbuO2223jppZd4+OGH+dKXvsTnPvc5AoFALvsoBpHKGHhspuNkdTDxHw9vYH6th8sWTaG+3JGHno6Orqp0hxIT9oAjhCheXpsZm1kjmkxjN8vFj+E6c2YZC+u8bGz08asX93Pju0/Kd5dyyqKreB2mAZOrGIZBayDWu7Rpc5Of4Fvqo5Q6zL1LmxZM9UiyAFGwcjICOhwObDYbZrO5N1uUGB/pjIEvOnBWh0+cNYMNDT7ag3Ge2dnOMzvbWVTn5bJFU1hU5y2aq2sOi053JCHrmIUQBcdm1ih3WGjxxySwGAFFUfj0uTP54u/W88r+btY39HDatJJ8dysnLLrK1FIbq57fe0xylU+dW88P/rGTp7a20v6WlK42k8apUz2HN117ZY+hKBojHgH379/Pww8/zMMPP8zOnTtZvnw5t9xyC5dffnku+yeGwBdOHjerw48+uIDHNxxizd5ONjT62NDoY0aZnfctmsK5cyqGtR4zH2xmDb8/QSCWlMBCCFFwKlwWGnsiZAxjwI2yYmDTSu2853BK9fv+vY+ff3gReoEfl4bC6zCx6vm9/Gz1sclVMobBObPL+e3ag+iqwrxqFwsP75OYU+WSiteiKCmGYQx7Qd2ZZ57Ja6+9xqmnnsqVV17JRz7yEaZMmTIW/cu5QCCAx+PB7/fjdruH/fhMxuCVfd0YGLishZP+1KKreO0mvHZTn6wOb61j0RqI8ZdNzfzzjdbe3NalDjOXnlrLO+dX4yzgPQytgSgzyhzMqXLluytCiGEY7bg71s+XC/FUmtcO9KCh4LQW7jhayELxFJ998HUCsRSfPKeeyxYWx3nFQDRVYXalgyW3/mvApcqvfv1CHl9/iNmVTrloJsZMOmPQGYpzen3piFL3D2fMHdHlgAsuuIAtW7awYcMGvvzlL1NbWzuSp+l19913M2PGDKxWK0uXLuXVV18d0uMeeeQRFEXhsssuG9XrTwRHsjrsaQ/TE06wpz1MWyB+TKq4areVT587k/uvPoOrlk2n1G6mO5zgN2sP8J8PvMZ9/95HWyA2wKvkl82UrQCaSkv6OyFEYbHoGpVOC8F48viNRb+cFp2rls0A4HevNuCLJPLboVHSVYWeyODJVYKxFIunl0hQISaMEQUWt956K2vXrmX+/PlYrVasVivz58/nl7/85bCf69FHH2XlypXcfPPNrF+/ngULFnDxxRfT3t4+6OMOHDjAV77yFc4999yRvIUJK50xiKcyx83s4LTqfHBxHb+8eglfumAO00vtRJNpntjUzGcefJ3v/2MHu9sKa7+Mw6wRSaQJxvofpIUQIp9KnWYUBcmsMwoXnljFrAoHkUSa3758MN/dGbFkOsMjrzXgsuq4bf3PYGWTq+ik5PdFTCAjCixuuukmrrvuOi699FIee+wxHnvsMS699FKuv/56brrppmE9149//GM+/elPs2LFCk466SRWrVqF3W7n17/+9YCPSafTXHnlldxyyy3MnDlz0OePx+MEAoE+X+Iok6ZywYlV/Pwji7jl0pNZWOclY8C/d3ey8rFNfO2Pm3l1fxeZ4a+YyzldU0llMvijxX0VSwgxPMUyjntsJlwWE6G4XPwYKU1V+MzbZgHwrzfa2FVgF7iGYldbkC89upFfv3SAl/Z08onDszBvteKsenyRpASiYkIZUWBxzz33cN9993H77bfz3ve+l/e+973cfvvt3HvvvfziF78Y8vMkEgnWrVvHhRdeeLRDqsqFF17I2rVrB3zct7/9bSorK/nkJz953Ne4/fbb8Xg8vV91dXVD7t9koigKp00v4Tv/MZ+ffXgh58+rQFMVtjUH+M6T27nmofX8Y1sriTxX4bSbdDpCCTIyEAsxaRTLOG7SVCpcFsIJWQ41GifVuDlvXgUGcO8L+wriwtZQxFNpHlizn//+wyYauiN4bCa6Qgk+d94srrtgTu/Mhdumc90Fc/js8pn4IvK7IiaWEe0wSyaTLFmy5JjbFy9eTCo19Cs1nZ2dpNNpqqqq+txeVVXFjh07+n3Miy++yK9+9Ss2btw4pNe44YYbWLlyZe/3gUCgYA9KhaK+3MnKi+Zx1bIZ/HVzM3/f2sohX5S7nt3Dgy8f5N2n1HDJKTUj2gA0Wg6Ljj+WIJRI4S6gzfNCiLFTTON4qcOM1qmSTGcKPtteIfvEshm8vK+LnW1BntvZwdtPqMx3lwa1vSXAnat3c8gXBWD53Ao+fe5MPDYTTd1Rrlw6jWvPn9UnuUpTd/SYfZBCFLsRBRYf//jHueeee/jxj3/c5/Z7772XK6+8Micd608wGOTjH/849913H+Xl5UN6jMViwWKRQjIjUe608Imz6vnQkjr++UYbT2xqpiMY5+FXbEEv1wAAOoxJREFUG/jD+iYuOKGS/1gwhSkl41e0zqyrJFMGgWhSAgshJoliGsfdhwuWhmIpShzmfHenaJU5LXxoSR2/XXuQB9bs58yZpQVZIySWTPPgywf5y6ZmDKDUbuaa82extL6st82R5CqdoQS6qtARjMvyJzFhjfiv9Fe/+hX//Oc/OfPMMwF45ZVXaGho4KqrrupzZemtwceblZeXo2kabW1tfW5va2ujurr6mPZ79+7lwIEDXHrppb23ZTLZaF/XdXbu3MmsWbNG+pbEAOxmncsWTuHSU2t5aU8nj284xJ6OEH/f2spTW1tZOrOU9y2ayonVrnEp4GPWVbpCCaaW2Mf8tYQQYjg0VaHKbWV7S4ASJLAYjcsWTuHpN9po8cf4/euNfOKs+nx3qY8tTT5+/uweWvzZTIoXnljJJ8+eOWC64XTGkIBCTHgjCiy2bt3KaaedBmRP9iEbJJSXl7N169bedsc7yTSbzSxevJjVq1f3pozNZDKsXr2aL3zhC8e0P+GEE9iyZUuf22688UaCwSB33nlnwU6NTxSaqvC2uRWcO6ecrc0BHt/QxGsHenh5Xzcv7+tmXpWL9y2awpkzywYs7KOpCrqqkBrFAOsw6/giCSKJVEFewRJCTG4lDjNmXSWeSmPRJY3oSJk0lU+dM5PvPPkGf97YzEUnVo/rDPlAIokUv1l7kL9taQGys/tfOH82i6dPjGrhQozGiM7Knn322Zx1YOXKlVx99dUsWbKEM844g5/+9KeEw2FWrFgBwFVXXcWUKVO4/fbbe9PavpnX6wU45nYxdhRF4ZQpHk6Z4qGxO8KfNh7i2Z3t7GwLcsdTO6h2W3nvglouPLEKmzl7ULXoKl6HCa/NhD+axGMz9VvAbyisJpWeaIZgTAILIUThcVl0vHYTgUgKi1MCi9E4fUYJi6eXsO5gD798cR83X3pyXvuzvqGHu57dQ0cwDsA7T65mxdkz5Fg0DrrDCSLJFDZdw2s3S2XyIUqmM/SEE+iawjgsKhn5UqhcueKKK+jo6OCmm26itbWVhQsX8tRTT/Vu6G5oaEBVZQNcoaortfPFt8/hY2dO58ktLfxtcwutgRj3/nsfD7/awLvmV/OhJXXMqnSw6vm9PLDmAIFoCrdNZ8VZ9Xx2+cxhb2BTFAVNUegKx6lyW8fw3QkhxPApikKV20Z7wJ/vrhQ9RVH41Dn1bGz08frBHl470M3pM0rHvR+heIpfv7Sfp9/ILt2uclv44tvnsGCqd9z7MhnFkmmS6Qxzq1x0hxO0BWPoioLXnp0dFMeKJtL4YwkUFMqdZmq9NpzjEAArhlEkedxyZDhlyfuTyRi8sq8bAwOXbB4+RiyZ5pkd7fxp46Hedaf3XbWYLU1+fvbMnmPaX3fBHK5cOo22QHxYrxOKpUhjcPqMEllqIESBG+24O9bPNxbC8RSvH+jGZtJ7Z27FyP36pf08vuEQtR4rd330tHHNuPXagW7ufnYPXeFsDaX3nFrDVWfOkJ/rOMkYBi3+KPXlTuZWOUlnDLojCVp8MTqCcQwMPFaz/DwAwzAIx7MBhVXXqHRbqPbY8NpMqKOY4RnOmJv3GQsxsVhNGpecUsPFJ1fz6v4untnRztmzy/nyY5v6bX//mv1ce/4sOkOJYe25sJk12oMxgjFZaiCEKDwOi06Jw0xHMC4nPDnw4dPreHZnO83+GH/Z1Mz7T5s65q8ZjCW599/7eG5nBwC1Hiv/dcEcTq71jPlri6O6wwm8djPTy+woioKuKVS6rFQ4LfREkrT6o7QH4nRH4nisZhwWbVwSyRSSjJHNlhlOpHBYdGZXOql0W/OSPVMCCzEmNFVh2axyzptXSTCWIhDtv75JIJrCH02hq8qwAgtNVTAAXyRBubM40lAKISaXSpeVFn8MwzAm3YlOrtnNOlcvm8Gdq3fzyGuNnDevktIxTOe7Zm8n9zy/F18kiarAfyycwkfPmIbVJEHieIom0qQzBjMrHMd89oqiUOowU+owM7U0SXsgRos/RrM/icOs4bKaJvw+jFQ6gz+aJJZK47GZOKnGTYXLmteLGRJYiDGVyhiU2LN53fsLLtw2HY9N790INxwOs05nKEF9uTHhBw8hRPHx2k3YzRrhRBqnRQ63o/X2Eyr5+9YWdrWF+M2aA1x/0dycv4Y/mmTV83t5cU8nAHUlNq67YC7zql05fy0xuIxh0B2JM7PcScVxLiC6rSbcVhNTvHY6gjGafTFaAzGsuorHZkKfYMUq46k0/kiStGFQ4jAz1+ui3GkpiP0m+e+BmNDSGQNfNMmKAfKPrzirHl8kOaLUs3azRiiWIhhLjrabQgiRc1aTRpnTTDje/4ytGB5VUfjs27K1qp7Z2c6O1kDOntswDF7Y1cE1D63jxT2dqAp8cPFU7vzwIgkq8qQrlKDUYWba4SVQQ2Eza0wrc3Da9BIW1HlwWnU6w3HaAjHiqfQY93jsRRIpWvxRfNEk5S4LC6d5OW1aCbVeW0EEFSAzFmIc+MJJPrt8JpDdU3EkK9Qnls3ozQo1EiZNJZUxCERTeO1SiEoIUXjKnRaauqOkMzKzmgtzq1xceGIl/9rezv++sI8ffXAB6iiXmXWHE9zz/B5e3tcNwIwyO9ddMJfZlc5cdFmMQCSRIoPBzArniJafmXWVGo+NSpeV7nCCFn+UjlCcVDqB12YqqvTAhmEQiqcIxFJYTSp1pTaq3Ta8dlNBLrEsnk9WFK14KkNTd5Qrl07j2vNn0RNJ4rLq/Ht3J6/s7abcNfI9EjZTdhN3XamtIP/AhBCTm9dmxmHRCcdTuG2SSTAXrjpzBi/t6WJPe4jV29u46KTqET2PYRg8u7Od+/69n1A8u9fvQ0vquHzx1HHNOiX6yhgGPZEEsyuco95DqakKFS4L5U4z/miSVn+MtkCMnnACl9WEy6oX7LlDOmMQiCWJJNI4LRpzKp1Uui0Fn5FUAgsxLuKpDG2BOJ2hBLqqcNOft/KPbW2cMsXDrZfNH/Eftt2sEYwlCcVTBf/HJoSYfMy6SqXLwr7OsAQWOVLiMPORM+r49UsH+M3agyybVT7sPSydoTh3P7uH1w/2ADC7wsl1F8xhRrljLLoshqEzFKfMaaGuzJ6z51QO17zw2s1MLbXTHojR7I/R7I9iN+m4bYWz0Tt5eEN2Ip3BYzNRX+6gwmUpmsQBEpKLcZXOGMRTGT64uA6zprLlkL93k9xIWE0aibRBMCZrmIUQhanMaUFFIZUeeiFQMbj3nFrLFK8NfzTJI682DPlxhmHwj22tXPvwel4/2IOuKly1bDo//OACCSoKQCSRQlFgZrljzGpUOS06MyucLJlewvwpHswmldZAlI5gnGQe/0ZjyTStgRhd4QRum86CqV4WTy+hrtReNEEFSGAh8qTKbeXyxdk85L9+aT+x5Mg3VemqMqKsUkIIMR7cVh2XTSckm7hzxqSpfPrc7N69v25pobE7ctzHtAVifPPPW7nr2T1EEmnmVbn42YcX8cHFdQVztXoyS2cMeiJJppXaKRuHNPJWk8bUEjuLp5ewsK4Ej12nK5ygLRAb1TnJcIXjKZp9UYLxJFVuC4umeVlYV0K1x1qUS/KKr8diwnj/aVOodFnoDCX4w7qmET+P06Jn8ziP40AghBBDpWsq1W4LkYSMUbm0eHoJZ8woJZ0xuO/f+zCM/rMLZgyDJzc384XfrWdTkx+zpvLJs+v53gdOpa40d8ttxOh0heKUO81MKx3fmSOTplLtsbKoroRF07xUe6wE40mafdExy+hmGAb+aJJDvgjxdJoZ5XYWTyvllCkeyp2Wog50JbAQeWPRNT55TjYN7R83NNHqj43oeWwmjWgyTSAqaWeFEIWpxG5B05S8LrWYiD55Tj26qrCh0ccr+7vRVAWLrvaemDX7onz98S2semEfsWSGk2vd/Pwji7hs0ZSiPnmbaMLxFKoKMyuceUubqqoK5U4LJ9e6WTytlPpyB/F0mkO+CP5okswAgetwpDMG3eEEzf4oKHBCtYsl00uZV+3GU6BZnoZLAguRV8tmlrFgqodk2uCXL+4b0XMoioKmKHSHEznunRBC5IbLquO1mWQ/WI7Vem28b9EUZlU4mVvlZFalgxKHidmVDhQFfrp6F9uaA1hNKp9720xue98p1Hpt+e62eJNsvasE00odY1pNfagURcFjNzH38En/CdUuFAVa/FG6w4kR1d1KpjN0BOO0BaJYTCrzp3hYMr2EGeVOHBOseObEejei6CiKwmfeNov/emQDr+zvZv3BHk6bXjLs57GbNbrCCRKpTMEUiRFCiCNUVaHKbWVbix/I/8nTRPLxM6dzwyUncv9L+/niIxt6ayVdvWwG/++TS7n5iW1ctnAKVW5rvrsq+tEVjlPhshTksjSHRcdhcVLz/9u78+go63t/4O9nmWfWzEwmIRtkAxTZV6HBtmCLUsV762m99dpepdQq16XqobVgD4IeTy9q3XqUK1Ur2FutaL0uVYtHEeoC/rQCCl5F2ddsJJl9fZ7v74+BQGRmkpBlJpn365wcT575PJNPHsbP5DPf5XFb0RyI4XBrCI3+CBRZQqFN63QNRCSuw3t8NkWRQ0OF24kiuzbo7gR+KjYWlHVVHhsuGV+Olz85gsfe3YOHh03u9oIlm6aiKRCBPxLvl0VfRETd5baZYFEUROL6gNrlJdeVuS1Ys2kvHn57V/sxXziBh9/eBUkCll0yBg0+bvCRiwLRBBRJwvDi7E2B6gqzqmCo24rSAnNyKlNbGM2BGAQEXBYNVu3k/89CCARjOnyRGLTj6zfKXRYU2jTIeTD9Lnf/FSmvXDG9Cm6rCYfbwvjbJ0e6fb4iSzAMcJ0FEeUsh1lFoV3jdKhepMgS3FYT1mzal/LxNZv2wW3LnXsU0Em6IeALx1BdZENhDkyB6gpVkVHitGDCMDemVBdiqNuKUCyBw20hBCIJeMNxHPGGEdcNDC92YEq1B+OGupJbTufJa5CNBeUEuzk5bA0Az3508IzWS9g0BQ3+6BnNfyQi6muSlLwLcFzX0+5gRN2jyhK84Th84dTNmi+cgDecvKs25ZbmQBRDCiwYloNToDojyxI8dg1jKlyYUlOIkSUOJIQBWQJGlzsxraYQZ5UWwJWHN8VkY0E54zujS3B2qQPhuI6n0nz6lIndrCIYSyDATwOJKEe5bSZYtOROdtRzCUPAZTXBaU09s9tpVeGyqkjwA6ecEogkoCoShg+xD8h7NZzKaTFhZEkBzq3xYFqNB9VFdti0/F1pMLD/NWlQkSUJC789AgDw9s5GfH7U163zTYqMeMKAL8LpUESUm2yaiiK7mdOheklyR6E4FsysTfn4gpm1aAvFOZKdQ3RDwBuJoabIBrdtYEyB6gqLScnpdSL9hVeAcsrZpQW4YHQpAOCxd/Z0+83AoipoCkQ4zYCIctaQAjN0IXplX3wC2oJxLJw1HDd/96z2kQunVcXN3z0LC2cNR1uIHzblkqZABGVOC4YVDrwpUNS5/B2roZx1ZV013t/djF1NAbz1eQPmji3r8rl2swpfOIFQTB90e0MT0eDgtplg1xSEojocFtapnoomDBxqCeMnM6pww/kj4A0n4LKqaAvFcagljGiCNyXMFf5IHJoio7bYMai3XM1n/FelnFNo0/Dj6VUAgD9t3tetNRMWk4JoXOd0KCLKWWZVwZACMwIx1qneEk0YaPBFsasxiNZgDLsag2jwRdlU5JCEnpyqXFNsh8uWf4ua8wUbC8pJ88aXo9Jjgy+SwNMf7u/Wuaos41iAd+EmotxV5DBDCHDufy/TDYFowuB1zUFNgSjKXVYM5Z3PBzU2FpSTVEXGwm8NBwC8vv0o9jUHu3yu3ayiNRRDhLuuEFGOcllNcJhVBKJcxE2Dny8ch9kko6bYzilQgxz/dSlnTax0Y+aIIhgCeOzdPV1ekG3VFISiCU6HIqKcZVJklDotCLKxoEEurhvwRxMYXmzPy/s65Bs2FpTTfnZeLTRFxvbDXry3q7lL58iSBFmW4OVOIESUwzx2DYosIa4PnHUAuiE4GkzdkpwCZUGFm7tA5QM2FpTTSp0WXDZ1GADgyff3dfkNzWpS0eSPIjGA3rCJKL84j9/YLdenQxlCwB+J46g3jKZABP5oHM2BaLbTogHAG47DalJQO8QOhXc/zwtsLCjn/WDKUJQUmNEciOKvHx/q0jkOs4pQTOdNqIgoZymyhFKnBaFY7tUpIQSC0QTqfRHUe8MwANQU2TG5qhDjh7qhqTIafBHei4PSiusGgrEEhg+xw2nhFKh8wcaCcp5ZVXD1N5N3Vf3frYdQ7410eo4iS9CFAW+Yu0MRUe5y2zRoqoxoIjemF4VjOhr9ERz1hRHTDQx1WzC5qhDn1hTi7LICFDvMGFJgxrihLrhsJhz1hrkDE6XU5E9OgSp3cReofJITjcXKlStRU1MDi8WCGTNm4MMPP0wb+/jjj+Nb3/oWCgsLUVhYiDlz5mSMp8GhbngRJlW6EdcFnnhvT5fOsaoqmgIxGHzTI6Ic5bSocFlN3bpfT2+LJQwcC0RxuC2MUDyBYocZE4a5cW6NB2MqXChxWmBWlQ7nuKwmjK1wosxpQb0vPKDWiVDf84bjsJkV1BZzClS+yXpjsXbtWixatAjLly/Hli1bMHHiRMydOxeNjY0p4zdu3IgrrrgCGzZswObNm1FZWYkLL7wQhw8f7ufMqT9JkoRrvjUciizh/+1twZb9rZ2eYzer8EfiCOTgNAMiIiBZ20qdFkT6+UZucd1AayiGw60heCMxFFhVjBvqxLQaDyYMc6HcZYVVUzI+h01TMbrCiSqPDY3+CBd1E4DjU6CicQwf4kABp0Dlnaw3Fg888ACuueYaLFiwAGPGjMGqVatgs9nw5JNPpox/+umncf3112PSpEk455xz8MQTT8AwDKxfv76fM6f+VuWx4ZLx5QCS28929gmZpsqIJQz4wtwdiohyV6FNg8UkIxzr2z/MdUPAG47jSFsYLcEYLCYZoyucmFrtwZSqQgwrtMFhViFJXf+E2awqOLu0ACOGONAaiuX8QnTqe02BCCrcNpQ7LdlOhbIgq41FLBbDxx9/jDlz5rQfk2UZc+bMwebNm7v0HKFQCPF4HB6PJ+Xj0WgUPp+vwxcNXFdMr4LbasLhtjD+9smRTuPNqsK7cBMNcIO9jtvNKgptGvzR3v8QxBACgUgC9b4wGv0RyBIwssSBqdWFmFrtQXVR8t4C3Wkmvk5VZIwY4sCosgIEYwm0hVhz81VbKAa7pqJ2iB0yp0Dlpaw2Fs3NzdB1HaWlpR2Ol5aWor6+vkvPsXjxYlRUVHRoTk61YsUKuFyu9q/Kysoe503ZYzermF9XAwB49qODaAlmfgOzayraQrGc3HWFiLomH+p4qdOChGF0+UagmQghEIol0OCLoN4XgS4MVHlsmFJdiHNrPRhR4kDh8Xto9BZZllBdZMe4ChcMIdDkj/bK70IDRyxhIBzXMXyIAw6zmu10KEuyPhWqJ+6++248++yzePHFF2GxpB5yu+222+D1etu/Dh482M9ZUm/7zugSnF3qQDiu46lN+zLGWkwyIgkDvjAbC6KBKh/quNtmgtWU3Cb7TEXiOpr8URz1hhFJ6ChzWTC50o1zaz0YVeZEscMMk9K3b/tlLgvGDXXBbJLR4Od2tPlCCHF8CpQVZZwCldey2lIWFxdDURQ0NDR0ON7Q0ICysrKM59533324++678dZbb2HChAlp48xmM8xmc6/kS7lBliQs/PYI/PL5T/D2zkZ8b1wZRpc7U8ZKkgRFktASiqLMxWJHNBDlQx23mBQUF2g43BqBvRuf9sYSBvyROCIJHRZVQZFDQ4nTDLdV63TxdV8pcpgxfpiMnfV+HPWGUea0cmegQa4tHEeBxYTaYk6ByndZHbHQNA1Tp07tsPD6xELsurq6tOfde++9uOuuu7Bu3TpMmzatP1KlHHN2aQEuGJ2cQvfYO3sy7qNu11S0BGI5s088EVEqxQ4zhBCdfsqfOLGjU1sYbZEYHBYVYytcmFbb9R2d+prTYsK4Che3o80DsYSBSFzH8CH2bjXFNDhl/RWwaNEizJ8/H9OmTcP06dPx0EMPIRgMYsGCBQCAq666CkOHDsWKFSsAAPfccw+WLVuGZ555BjU1Ne1rMRwOBxwOR9Z+D+p/V9ZV4/3dzdjVFMBbnzdg7tjUo1xWTUGjPw5/JAGzI7tvtkRE6bitGuxmFcFo4rRtOnVDIBBNIBhNQJElOCwqqjxWFNrMKLCoOfkpsVVTMLrCCU2VcbA1BI/NDIuJNXgwEUKgMRBBtceG0gLOCqAcaCwuv/xyNDU1YdmyZaivr8ekSZOwbt269gXdBw4cgCyfHFh59NFHEYvFcNlll3V4nuXLl+OOO+7o83wlCTCbZDQFogjFdDjMKmya0qMdNejMFNo0/Hh6FZ54by/+tHkfzhtRDIfl9Je0IksQSO5WUewY3NMpiGjg0lQZJQVm7GkOosBigiEEQjEdgWgcQgAOs4oRJXZ47Ga4rKYBMb3oxHa0JkXG3uYgCsymlHWaBqbWUBwuiwk1nAJFx0kiz7Zt8Pl8cLlc8Hq9cDpTz8vvTDShwxuKo8kfxbFgDOF4Aqokw2FRYTWxyehPCd3ATWu34WBLCJdMKMfCb49IGecNx6EqEs6t8QyIN2OiwaQ36m5fPl8uORaIYsuBNkgAdCFg1xQUO8wocpjhtpn6fPF1XzEMgUOtIXzZGIBZkeG2adlOiXoomtDRGoph/FA31zAOct2pufzY4AyYVQUlTgUlTgsicR1toTga/RG0BmNoC8VgUhQUWFQO+fYDVZGx8FvDsfTlHXh9+1HMHVOGmmL7aXE2TUFLMAZ/JM43NCLKWS6rCcUODaoioaTAApfVNCjeS2RZQlWRHZqqYGe9D03+KIodGj+IG6CEEGgOxFBdZEWpkzMB6KSB+dFHDrGYFJS5LJgwLLml3/hhbhQ5NARjCRxuC+NYIIpInIuG+9LESjdmjiiCIZJ35E41CGdSZCQMwW1niSinqYqMicPcGD/UjVKnZVA0FafidrSDQ0swBrfVhJoiB5tD6oCNRS+yaSoq3FZMGObCtBoPxg9zwWUzIRCN47A3hGOBKGIJ7ozRF352Xi00Rcb2w168t6s5ZYzVpKDRH+FNm4gopw32uerJ7WhdcNs0HPWGM+7qR7knEtcRNwRqh9izvvsY5R42Fn1AkiQ4zCqGuq2YVOnGtBoPxpa74LCo8EZiONwWQmswxu33elGp04LLpg4DADz5/r6Uo0Q2TYE/EkcgylELIqJsOrEdbbnLyu1oBxAhBI4Fo6gstKKkgFOg6HRsLPqYJEkosJhQ6bFhSlUhplZ7MKbcCatZQUsohiNtYbSGYkiwqPbYD6YMRUmBGc2BKP768aHTHreYFMR0AX+EjQURUbZZNQWjy52o8tjQFIgg3IO7jlP/aAnG4LZpqCm2cwoUpcTGoh/JsgSX1YSqIjumVhXi3GoPRpU5oKkyjgVjOOINwRuOc1j4DJlVBVd/sxYA8L9bD6HeGzktxiTLaPJH+zs1IiJKQVNljCpzYkSxA63hGAL84CdnReI6EobA8GL7oFv7Q72HjUWWyLIEl82EmmIHzq3xYGp1IUYOKYAqS2gKRHHEG4aPTUa31Q0vwqRKN+K6wBPv7TntcbtZgTcc5ydjREQ5QpEljChxYHRZAYLxBFpDsWynRF9jnJgC5bFhCKdAUQZsLHKAIksotGsYUeLAtJpCTKlyY8QQOyQJaPRHUO8Lwx+Jc/eMLpAkCdd8azgUWcL/29uCLftbOzxuNSkIx3X4I/EsZUhERF8nScntaMdVuCCEQJM/yo02csiJKVDVRTZOgaKM2FjkGFWRUeQwY2RJAc6t9WBKdSGqPXYYAOq9YTT4IghEE2wyMqjy2HDJ+HIAye1nT10UKEkSZEhoCfITMSKiXHNiO1oLt6PNGeGYDt0QGDHEwSlQ1Ck2FjnMpMgodphxdlkBzq0pxOTqQlQUWpAwDNT7ImjwRRCKJfipTgpXTK+C22rC4bYw/vbJkQ6P2c0KjgVj3PqXiCgHFTnMGMftaHOCIQRaQlFUeWwodvDmstQ5NhYDhFlVUFJgwZhyF86t8WBSpRvlLguiCQNHfWE0+tlknMpuVjG/rgYA8OxHBzuMUNg0FaFYgtOhiIhy1Ne3o+UHQdlxLBCDx66hilOgqIvYWAxAFpOCUqcFY4cmm4yJwwoxpMCMSFzHUW8YTf4ogpwuhe+MLsHZpQ6E4zqe2rSv/bgiSzAMoC3ExoKIKFeduh1tc5Db0fa3UCwBAwLDOQWKuoGNxQBn1RSUuSyYMMyNc2s9GD/MjSKHhrhhoNEfwRFvCC3BGMIxPe9GM2RJwsJvjwAAvL2zEV8c9bU/ZtMUNAWiHGInIsph7dvRDnGgjdvR9hvdEGgJxVDjsaHYwV2gqOvYWAwiNk1FhduKiZVuTK/1YEpVIUaVFsBhURCOJ3DEG8ZRbxhtoRiiifz45Ofs0gJcMLoUAPCHd/a0NxJ2s4pANME3KSKiHKfIEkYMceAcbkfbb44Foyh2mFFZZMt2KjTAqNlOgPqGWVVgdigocphRXSQQjusIRBPwheNoDsTgj8RxLBGFLMuwmRRYNQUmZXD2mVfWVeP93c3Y1RTAW583YO7YMpgUGQndgC8Sh8tmynaKRESUwYntaDVVwc56H5r8URQ7NM777wOhWAKSBAwvtsOscgoUdc/g/EuSOpAkCTZNRUmBBSNLCjC9xoNpNR5MrCxEZaEVkgS0hGI47A2hyR9FIJoYVFOECm0afjy9CgDwp8372kcpLKqCpkAk76aIERENVNyOtm/phkBrKI4qjw1FnAJFZ4CNRR6SZQkFFhPKXBacU+7E9FoPzq32YPxQN4Y4NeiGgSZ/FEfawjgWiA6K3abmjS9HpccGXySBpz/cDyA5HcoXTiDEBYFERAMGt6PtO8cCyZGgKo8926nQAMXGgqAqMlw2E4a6rRg/NLkIfGpNIUaXO+GymRBJJHebOuINozUUQyQ+8P4QVxUZC781HADw+vaj2NcchMWkIBrX4eO2s0REAwq3o+25hG4gGE3gWCB6/D0+BM0kY/gQBzSVfx7SmeEaCzqNWVVgVpX2vavDsZPrM44Fk1vZtoSikCHDqimwDZD1GRMr3Zg5ogibdh/DY+/uwW8vHQdVlnEsEEO5y5rt9IiIqBtObEdrUiQcbA2h0GqGVeOagFR0QyCa0BGNG4gc37xFliRYTDKcNhVuqwa7WYVNU1Bg4bpDOnNsLKhTVi25uHtIgRnDhR3BmI5gNAFvKI5jgRhawzEkEgKqnFzLYdUUKHJuLqj72Xm1+Oe+Vmw/7MX7u49hcqW7fRSG+3QTEQ0sJ7aj1VQZe5qC0A0THJb8/tPGEALRuIFoQkckYcAwBBRZgqbKsJkVVBRakk2EKfl+zdEJ6k35+39fMAgoKf6QVBTAYukYl44sA1brmcWGQkC6dQuSBNhsZxYbDgNGhiFhu/3MYiMRQNchAXAAcChAaYEM3W5GIKogqJrRGoyhNRxHyzEv9LgOsyrBqimwmJQOO3cIqy2ZNwApGgX09Fu+divWYk1eZwBSLAYkTp/iVKYCV4z14E+fNOKP7+3Fyh9PRjgQgu+YDEuB5bR4AMnXw4nXSiwGxDNMnTo1Nh5PxqdjNgOq2v3YRAKIRtPHahpgMnU/VteT/87pmEzJ+O7GGkbytdYbsaqavBZA8v+JUKh3Yrvz/z1rROrY4zUipUy/d0+wjvfev1F3Y20nazOi0WSt6Y1Y68k63mm9PR6ryBJGuDSYI2F81eRFpC3ZcKiKDFWWYFJkSFZr+2sl3fvDCcLcsY5L8fS1WWgd63iXYxMJSLH0tVmYOtbxdLFCCEQlFREpObU3Ho/DFI9BU6TkzXRtJhSYTbAc3/3RYrN8rY6HgXRpsI4nsUZ0r4aLPOP1egUA4U3+853+dfHFHU+w2VLHAULMmtUxtrg4fey0aR1jq6vTx44Z0zF2zJj0sdXVHWOnTUsfW1zcMXbWrPSxNlvH2IsvTh97yssoGtdF9NIfZIz99ItD4tODbeLTg22i5bIrMsZ+tm1Xe2zzVT/PGPv5pk/aYxsX/iJj7OW/eFxUL35V/Pr5T8Su63+Z+Xf78MOT1+HeezPHbthwMvaRRzLHvvrqydjVqzPHPvfcydjnnsscu3r1ydhXX80c+8gjJ2M3bMgce++9J2M//DBz7PLlJ2N37Mgc+6tfnYzduzdz7PXXn4xtbMwcO3/+ydhAIHPsZZd1fL1nimWNSH51o0Z4AQFAeL1e0RtYx0/RB3VcCJH8fyJTbCBwMnb+/MyxjY0nY6+/PnPs3r0nY3/1q8yxO3acjF2+PGPspr+8LtbtOCrW7Tgqvvzl7Rljdz/3t/b3kkN3/S5j7N41a9tjD9y/MmPsvkfXtMfue3RNxtgD969sj927Zm3G2J23rxCbdzeLz460icZX/p75mrGOJ79YI5JfXawR3anh+TtiQX1CU2Wgk2HVBm8EekKGSVZQomdnwd0PpgzFB18Y+N+th/DTOBf9ERENZueUOREe6kJcN2CxZl5D0BKIwR+IQpUl2LO4KDyuG4jGDYhw5g1GqotsGFnjgSxLQLqRd6J+IiWbuvzh8/ngcrngPXIETqfz9AAOj6WO7YUhdCEEQjEdIdWMSMKANxxDoC2EWDSKuC4gRPLyJxePy8kmxWbv1alQJxhmC5a9+jm2HWxDXWUBrpkxDJOqCuGxa6cHcypUEqdCdT+WNQLA8bpbUQGv15u67nYT6zinQnU7NkUdNwyBmG4grhuI6yL534SBiGpCSAfCMR2JaAx6JIKELpLb2kqADAmyDJhkGbLVAlXToCoSVD0BOdMUqwxToXTdQCRhIBrXEdMFDJMJikWDRVXgUACPImDW5OQ6RtPX1jGyjiexjp/UyzWiOzU8fxuLXnqDo57RDYFIXEc4riMc0+GPxOENJxBN6IglDAgAqiTBbDrZbMi9dKfVAy0h3PTsVuiGwE3fHYkfTB6KmmJHrzw3EZ3U23WXdZz6S/xE45E4tQkxkh+SxRKIxg3EDQMJQyCREJAkQEBAlmSYZKl9nYeqSFCPrwc5dYemqK4nF1crMiyqDLtZhdtqgt2swqIpsJkUqANg10Ua3LpTczkVirJKkSXYzSrs5pMvRcMQiCSSjUb4+H0mkjeyS6AtbEA3BFRZhlmVk6MbpjNrNqo8Nlwyvhwvf3IEz//zEL45oghVHntyOJmIiPKeSZGT26mnGMwGkHLUI5ZI7siUbD50JHQDkbhINh+GkXz/Mh3foclmgcNsgvX44mru0EQDHRsLyjny8W1rbVrHZiOaMBCKJRCO6whEEvCG4wjHk82GAQFFOtFsJBuOrmx5e8X0KhxoCeH680dgQmUhWkIxOC0mJAyjw8/PtnAsAUWW4Y/EUcD8uiWXcwOYH9FAJssSLLKSdrtyIZINxamjHqbjOzZxi3MajPjuQAOCLEvt99M4QQiBSNxITqOK6whE4sebDR2+cAKGEJAlKdlomFI3Gx67hj9dPR2PvbMHv3z+E/jCCTitKhbMrMX1s0fAnAOFPxrXseofe7B6017mN4hyY35Eg58kSTApUsZRD6LBJCcai5UrV+J3v/sd6uvrMXHiRDz88MOYPn162vjnn38et99+O/bt24ezzjoL99xzDy6++OJ+zJhygSR9vdmwQojkyMaJaVTBaAJt4fjx9RsJ6EJAxslmo9Rpx+Pv7sHDb+9qf15fOIHfr/8KAHD1N2uTC/ayRJEl/PG9ve35AMxvMOQGDPz8Fs4azpELIiLqIOvvCmvXrsWiRYuwatUqzJgxAw899BDmzp2LnTt3oqSk5LT4TZs24YorrsCKFStwySWX4JlnnsGll16KLVu2YNy4cVn4DSiXSNLJIebCU45H4nr7IvFgNIG2UBwJw4DLZsKaTftSPtfqTXuxcNZwzL5vI1qCGXZs6iMeu4b3Fp+P1Zv2pnyc+Q3M3IDBkd8N54/s56yIiCjXZX2V0AMPPIBrrrkGCxYswJgxY7Bq1SrYbDY8+eSTKeN///vf43vf+x5uvfVWjB49GnfddRemTJmCRx55pJ8zp4HEYlLgtmkod1kxsqQA02o8mFJVCH8kAV849daHvnACLcEYhjjM/Zxt0hCHGccCMeZ3BnI5N2Bw5OePZN5bn4iI8k9WRyxisRg+/vhj3Hbbbe3HZFnGnDlzsHnz5pTnbN68GYsWLepwbO7cuXjppZdSxkejUURP2cff5/P1PHEaFDRVgdMiwWlVU/4B5bSqKCmwYO2130A2JqRIAGxmlfkNstyAwZFfgSXzTcZ6E+s4EdHAkNXGorm5Gbquo7S0tMPx0tJSfPHFFynPqa+vTxlfX1+fMn7FihW48847eydhGnR0w8CCmbUd5pGfsGBmLRKGAXeqm+b1k3AswfzOUC7nBgyO/LR+GvRmHSciGhiyPhWqr912223wer3tXwcPHsx2SpRDrJqK62ePwM3fPQtOa7LPdlpV3Pzds3D97BFZX5zK/AZnbsyve1jHiYgGhqy+cxUXF0NRFDQ0NHQ43tDQgLKyspTnlJWVdSvebDbDbM7OPGUaGMwmBQtnDccN54/ssFd/rmynyfwGZ24A8+tyHqzjREQDQlZHLDRNw9SpU7F+/fr2Y4ZhYP369airq0t5Tl1dXYd4AHjzzTfTxhN1hU1ToakyihxmaKqc9U+Lv475nblczg1gfkRENHhk/R1i0aJFmD9/PqZNm4bp06fjoYceQjAYxIIFCwAAV111FYYOHYoVK1YAAG6++WbMmjUL999/P+bNm4dnn30W//znP/HYY49l89cgIiIiIsprWW8sLr/8cjQ1NWHZsmWor6/HpEmTsG7duvYF2gcOHIAsnxxYmTlzJp555hksXboUv/nNb3DWWWfhpZde4j0siIiIiIiySBJCZO/Wrlng8/ngcrng9XrhdDqznQ4R0aDX23WXdZyIqP90p+ZmfcSiv53oo7gPOhFR/zhRb3vrcyzWcSKi/tOdGp53jYXf7wcAVFZWZjkTIqL84vf74XK5euV5ANZxIqL+1JUanndToQzDwJEjR1BQUABJkrp1rs/nQ2VlJQ4ePMjh96/htUmP1yY9XpvUBtt1EULA7/ejoqKiw5q5M8U63vt4XdLjtUmP1ya9wXRtulPD827EQpZlDBs2rEfP4XQ6B/yLpK/w2qTHa5Mer01qg+m69MZIxQms432H1yU9Xpv0eG3SGyzXpqs1fNDfeZuIiIiIiPoeGwsiIiIiIuoxNhbdYDabsXz5cpjN5mynknN4bdLjtUmP1yY1Xpe+w2ubGq9Lerw26fHapJev1ybvFm8TEREREVHv44gFERERERH1GBsLIiIiIiLqMTYWRERERETUY2wsiIiIiIiox9hYfM3KlStRU1MDi8WCGTNm4MMPP8wY//zzz+Occ86BxWLB+PHj8frrr/dTpv2vO9dmzZo1kCSpw5fFYunHbPvPO++8g3/5l39BRUUFJEnCSy+91Ok5GzduxJQpU2A2mzFy5EisWbOmz/Psb929Lhs3bjztNSNJEurr6/sn4X60YsUKnHvuuSgoKEBJSQkuvfRS7Ny5s9Pz8qne9ATreGqs4amxhqfHOp4aa3h6bCxOsXbtWixatAjLly/Hli1bMHHiRMydOxeNjY0p4zdt2oQrrrgCV199NbZu3YpLL70Ul156KXbs2NHPmfe97l4bIHm3yaNHj7Z/7d+/vx8z7j/BYBATJ07EypUruxS/d+9ezJs3D+effz62bduGW265BT//+c/xxhtv9HGm/au71+WEnTt3dnjdlJSU9FGG2fOPf/wDN9xwAz744AO8+eabiMfjuPDCCxEMBtOek0/1pidYx1NjDU+PNTw91vHUWMMzENRu+vTp4oYbbmj/Xtd1UVFRIVasWJEy/kc/+pGYN29eh2MzZswQCxcu7NM8s6G712b16tXC5XL1U3a5A4B48cUXM8b8+te/FmPHju1w7PLLLxdz587tw8yyqyvXZcOGDQKAaG1t7ZecckljY6MAIP7xj3+kjcmnetMTrOOpsYZ3DWt4eqzj6bGGn8QRi+NisRg+/vhjzJkzp/2YLMuYM2cONm/enPKczZs3d4gHgLlz56aNH6jO5NoAQCAQQHV1NSorK/H9738fn332WX+km/Py5XVzpiZNmoTy8nJccMEFeP/997OdTr/wer0AAI/HkzaGr5vOsY6nxhreu/LhNdNT+VbHWcNPYmNxXHNzM3RdR2lpaYfjpaWlaecG1tfXdyt+oDqTazNq1Cg8+eSTePnll/HnP/8ZhmFg5syZOHToUH+knNPSvW58Ph/C4XCWssq+8vJyrFq1Ci+88AJeeOEFVFZWYvbs2diyZUu2U+tThmHglltuwXnnnYdx48aljcuXetMTrOOpsYb3Ltbw9PKxjrOGd6RmOwEanOrq6lBXV9f+/cyZMzF69Gj84Q9/wF133ZXFzChXjRo1CqNGjWr/fubMmdi9ezcefPBB/M///E8WM+tbN9xwA3bs2IH33nsv26kQtWMNpzORj3WcNbwjjlgcV1xcDEVR0NDQ0OF4Q0MDysrKUp5TVlbWrfiB6kyuzdeZTCZMnjwZu3bt6osUB5R0rxun0wmr1ZqlrHLT9OnTB/Vr5sYbb8Srr76KDRs2YNiwYRlj86Xe9ATreGqs4b2LNbx7BnMdZw0/HRuL4zRNw9SpU7F+/fr2Y4ZhYP369R0+tTlVXV1dh3gAePPNN9PGD1Rncm2+Ttd1bN++HeXl5X2V5oCRL6+b3rBt27ZB+ZoRQuDGG2/Eiy++iLfffhu1tbWdnsPXTedYx1NjDe9d+fCa6U2DsY6zhmeQ7dXjueTZZ58VZrNZrFmzRvzf//2fuPbaa4Xb7Rb19fVCCCGuvPJKsWTJkvb4999/X6iqKu677z7x+eefi+XLlwuTySS2b9+erV+hz3T32tx5553ijTfeELt37xYff/yx+Pd//3dhsVjEZ599lq1foc/4/X6xdetWsXXrVgFAPPDAA2Lr1q1i//79QgghlixZIq688sr2+D179gibzSZuvfVW8fnnn4uVK1cKRVHEunXrsvUr9InuXpcHH3xQvPTSS+Krr74S27dvFzfffLOQZVm89dZb2foV+sx1110nXC6X2Lhxozh69Gj7VygUao/J53rTE6zjqbGGp8canh7reGqs4emxsfiahx9+WFRVVQlN08T06dPFBx980P7YrFmzxPz58zvEP/fcc+Lss88WmqaJsWPHitdee62fM+4/3bk2t9xyS3tsaWmpuPjii8WWLVuykHXfO7G93te/TlyP+fPni1mzZp12zqRJk4SmaWL48OFi9erV/Z53X+vudbnnnnvEiBEjhMViER6PR8yePVu8/fbb2Um+j6W6LgA6vA7yvd70BOt4aqzhqbGGp8c6nhpreHqSEEL07ZgIERERERENdlxjQUREREREPcbGgoiIiIiIeoyNBRERERER9RgbCyIiIiIi6jE2FkRERERE1GNsLIiIiIiIqMfYWBARERERUY+xsSAiIiIioh5jY0GUY2KxGEaOHIlNmzZlO5Uu+cY3voEXXngh22kQEeUM1nHKV2wsiI5ramrCddddh6qqKpjNZpSVlWHu3Ll4//3322NqamogSRI++OCDDufecsstmD17dvv3d9xxByRJgiRJUBQFlZWVuPbaa9HS0tJpHqtWrUJtbS1mzpzZfuy3v/0tZs6cCZvNBrfbnfK8AwcOYN68ebDZbCgpKcGtt96KRCLR6c8Lh8Ow2+3YtWtXp7GpLF26FEuWLIFhGGd0PhFRb2EdZx2n7GJjQXTcD3/4Q2zduhVPPfUUvvzyS7zyyiuYPXs2jh071iHOYrFg8eLFnT7f2LFjcfToURw4cACrV6/GunXrcN1112U8RwiBRx55BFdffXWH47FYDP/2b/+W9nxd1zFv3jzEYjFs2rQJTz31FNasWYNly5Z1muebb76J6upqjBw5stPYVC666CL4/X78/e9/P6PziYh6C+s46zhlmSAi0draKgCIjRs3Zoyrrq4WN910k9A0Tbz22mvtx2+++WYxa9as9u+XL18uJk6c2OHcRYsWicLCwozP/9FHHwlZloXP50v5+OrVq4XL5Trt+Ouvvy5kWRb19fXtxx599FHhdDpFNBrN+DN/9rOficWLF6d87KmnnhJ2u118+eWX7ceuu+46MWrUKBEMBtuPLViwQPzHf/xHxp9DRNSXWMdZxyn7OGJBBMDhcMDhcOCll15CNBrNGFtbW4v//M//xG233dblYeN9+/bhjTfegKZpGePeffddnH322SgoKOhy7gCwefNmjB8/HqWlpe3H5s6dC5/Ph88++yzteYZh4NVXX8X3v//9lI9fddVVuPjii/GTn/wEiUQCr732Gp544gk8/fTTsNls7XHTp0/Hu+++262ciYh6E+s46zhlHxsLIgCqqmLNmjV46qmn4Ha7cd555+E3v/kNPv3005TxS5cuxd69e/H000+nfc7t27fD4XDAarWitrYWn332WadD7/v370dFRUW386+vr+/wZgSg/fv6+vq0552YYzxjxoy0MX/4wx9w9OhR3HTTTbj66qtxxx13YOrUqR1iKioqcPDgQc7PJaKsYR1nHafsY2NBdNwPf/hDHDlyBK+88gq+973vYePGjZgyZQrWrFlzWuyQIUPwq1/9CsuWLUMsFkv5fKNGjcK2bdvw0UcfYfHixZg7dy5+8YtfZMwhHA7DYrH0xq/TJS+//DIuueQSyHL6UlBYWIg//vGPePTRRzFixAgsWbLktBir1QrDMDr9lJCIqC+xjqfGOk79hY0F0SksFgsuuOAC3H777di0aRN++tOfYvny5SljFy1ahHA4jP/+7/9O+bimaRg5ciTGjRuHu+++G4qi4M4778z484uLi9Ha2trtvMvKytDQ0NDh2Invy8rK0p73yiuv4F//9V87ff533nkHiqLg6NGjCAaDpz3e0tICu90Oq9XazcyJiHoX63hqrOPUH9hYEGUwZsyYlAUYSM7nvf322/Hb3/4Wfr+/0+daunQp7rvvPhw5ciRtzOTJk/HFF19ACNGtPOvq6rB9+3Y0Nja2H3vzzTfhdDoxZsyYlOd89dVX2L9/Py644IKMz71p0ybcc889+Nvf/gaHw4Ebb7zxtJgdO3Zg8uTJ3cqZiKg/sI6zjlP/YWNBBODYsWP4zne+gz//+c/49NNPsXfvXjz//PO499570y6IA4Brr70WLpcLzzzzTKc/o66uDhMmTMB//dd/pY05//zzEQgETluod+DAAWzbtg0HDhyAruvYtm0btm3bhkAgAAC48MILMWbMGFx55ZX45JNP8MYbb2Dp0qW44YYbYDabU/6sl19+GXPmzOmweO/r/H4/rrzyStx000246KKL8PTTT2Pt2rX461//2iHu3XffxYUXXtjpNSAi6ius46mxjlO/yva2VES5IBKJiCVLlogpU6YIl8slbDabGDVqlFi6dKkIhULtcdXV1eLBBx/scO4zzzwjAHS6TaEQQvzlL38RZrNZHDhwIG0uP/rRj8SSJUs6HJs/f74AcNrXhg0b2mP27dsnLrroImG1WkVxcbH45S9/KeLxeNqf881vflM8/vjjaR8XIrn94Pjx40UkEmk/dv/99wuPxyMOHTokhBDi0KFDwmQyiYMHD2Z8LiKivsQ6nhrrOPUnSYhujtURUZ/69NNPccEFF2D37t1wOBx98jOam5tRXl6OQ4cOnbYLSXctXrwYra2teOyxx3opOyKigY11nPIVp0IR5ZgJEybgnnvuwd69e/vsZ7S0tOCBBx7o8ZsRAJSUlOCuu+7qhayIiAYH1nHKVxyxICIiIiKiHuOIBRERERER9RgbCyIiIiIi6jE2FkRERERE1GNsLIiIiIiIqMfYWBARERERUY+xsSAiIiIioh5jY0FERERERD3GxoKIiIiIiHqMjQUREREREfXY/wf+ROmj16eUCwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\"Linear ANCOVA model with Permutation Forest\")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "id": "74ab24c9-b9fe-4d67-9b04-7125aecb2816", + "metadata": {}, + "source": [ + "# Run Experiment on Correlated Logit Model" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n" + ] + } + ], + "source": [ + "pvalue_dict = defaultdict(list)\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "beta_space = np.hstack((np.linspace(0.01, 2.5, 8), np.linspace(5, 20, 7)))\n", + "for sigma_factor in j_space:\n", + " for idx in range(5):\n", + " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "\n", + " elements_dict = correlated_logit_model(sigma_factor, new_seed)\n", + " for key, value in elements_dict.items():\n", + " pvalue_dict[key].append(value)\n", + " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "\n", + "df = pd.DataFrame(pvalue_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, "outputs": [ @@ -378,7 +775,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 20, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, "outputs": [ @@ -403,10 +800,9 @@ " \n", " \n", " \n", - " X1\n", - " X6\n", " X2\n", - " X7\n", + " X1\n", + " X500\n", " sigma_factor\n", " \n", " \n", @@ -414,41 +810,36 @@ " \n", " 0\n", " 1.000000\n", - " 0.001996\n", - " 1.000000\n", + " 0.004975\n", " 1.000000\n", " 0.005\n", " \n", " \n", " 1\n", - " 0.001996\n", - " 0.001996\n", - " 0.001996\n", - " 0.001996\n", + " 0.004975\n", + " 0.004975\n", + " 1.000000\n", " 0.005\n", " \n", " \n", " 2\n", " 1.000000\n", - " 1.000000\n", - " 1.000000\n", + " 0.004975\n", " 1.000000\n", " 0.005\n", " \n", " \n", " 3\n", + " 0.004975\n", " 1.000000\n", - " 1.000000\n", - " 1.000000\n", - " 1.000000\n", + " 0.004975\n", " 0.005\n", " \n", " \n", " 4\n", + " 0.004975\n", " 1.000000\n", - " 0.001996\n", - " 1.000000\n", - " 0.001996\n", + " 0.004975\n", " 0.005\n", " \n", " \n", @@ -456,12 +847,12 @@ "" ], "text/plain": [ - " X1 X6 X2 X7 sigma_factor\n", - "0 1.000000 0.001996 1.000000 1.000000 0.005\n", - "1 0.001996 0.001996 0.001996 0.001996 0.005\n", - "2 1.000000 1.000000 1.000000 1.000000 0.005\n", - "3 1.000000 1.000000 1.000000 1.000000 0.005\n", - "4 1.000000 0.001996 1.000000 0.001996 0.005" + " X2 X1 X500 sigma_factor\n", + "0 1.000000 0.004975 1.000000 0.005\n", + "1 0.004975 0.004975 1.000000 0.005\n", + "2 1.000000 0.004975 1.000000 0.005\n", + "3 0.004975 1.000000 0.004975 0.005\n", + "4 0.004975 1.000000 0.004975 0.005" ] }, "metadata": {}, @@ -474,79 +865,15 @@ }, { "cell_type": "code", - "execution_count": 28, - "id": "9e60fac2-3b20-493e-886a-892d572a28c6", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", - "axs = axs.flatten()\n", - "\n", - "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", - " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", - "\n", - " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", - " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", - " ax.legend()\n", - "fig.suptitle(\n", - " \"Linear ANCOVA model with Coleman Forest (Permutation per tree and sample dataset per tree)\"\n", - ")\n", - "fig.tight_layout()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "7c99ce8c-a32d-447b-9dd2-85c8d310239f", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", - "axs = axs.flatten()\n", - "\n", - "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", - " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", - "\n", - " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", - " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", - " ax.legend()\n", - "fig.suptitle(\"Linear ANCOVA model with Coleman Forest (Permutation per tree)\")\n", - "fig.tight_layout()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "67846a66-1817-46c8-9ccc-5281773c4f92", + "execution_count": 25, + "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -554,38 +881,30 @@ } ], "source": [ - "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", "\n", - "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X500\"]):\n", " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", "\n", " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", - " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (beta)\")\n", " ax.legend()\n", - "fig.suptitle(\"Linear ANCOVA model with Coleman Forest\")\n", + "fig.suptitle(\"Correlated Logit model with Coleman Forest (permute per tree)\")\n", "fig.tight_layout()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0f772759-751d-440c-abcb-13f3ee6f7705", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "7cf30622-ffff-4d00-b474-0ac49fcfde4b", + "execution_count": 23, + "id": "34294429-04f3-4b12-baf3-fa6fdc11646f", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxYAAAJQCAYAAAATyPJiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAADLsUlEQVR4nOzdd3hUZdoG8PtML8lMekIKSShKlRKkCQKKIijKqmtDBVbsYGF1V1xXQEXsoliw48eua10r1qWIAipd6S2QUNKT6X3e748hA0MKSWaSSbl/1zUXzJlnZt55c86Z88zbJCGEABERERERURhk0S4AERERERG1fUwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsqF1YsmQJJEnCwYMHo12UEJIkYe7cudEuRq3mzp0LSZKiXYyw5OTkYOrUqU16bnP+bRpTt9WxZWVlzVKWpmrN+y41nN/vR58+fTB//vxoF4XaOI/Hg6ysLLzyyivRLgq1YkwsqE779+/Hrbfeii5dukCj0cBgMOCcc87BCy+8AIfDEe3iRcx7772HhQsXRu39Dx48CEmS8Mwzz0StDNUef/xxfPbZZ9EuRrvU3HW7atUqXH755UhLS4NKpUJKSgomTpyI//73v832nm2dJEm13tLS0qJdtFp9/fXXjU72/vOf/6CwsBAzZsxonkK1Ijt27MDcuXNb3Q9MkbB27VrMnTsXVVVVUSuDUqnErFmzMH/+fDidzqiVg1o3JhZUq2XLlqFv37748MMPMXHiRCxatAgLFixA586dcf/99+Puu++OdhEjJtqJRbQ89NBDNRJEJhaR0dJ1O2fOHIwZMwbbtm3DrbfeisWLF+P++++H1WrFFVdcgffee69Z3rc9uOCCC7B06dKQ28svvxztYtXq66+/xrx58xr1nKeffhrXXHMNjEZjM5Wq9dixYwfmzZvXbhOLefPmRTWxAIBp06ahrKyM5xSqkyLaBaDWJz8/H9dccw2ys7OxYsUKdOrUKfjYnXfeiX379mHZsmVhv48QAk6nE1qttsZjTqcTKpUKMhlz3+aiUCigUPAU0Bxasm4//vhjPPLII7jyyivx3nvvQalUBh+7//778d1338Hj8bRIWdqiM844A9dff33EX9fr9cLv90OlUkX8tRtq8+bN2Lp1K5599tmovL/dbodOp4vKe7dFNpsNer0+7Nfx+/1wu93QaDQRKFWouLg4XHjhhViyZAn+8pe/RPz1qe3jVRvV8NRTT8FqteKtt94KSSqqdevWLaTFwuv14tFHH0XXrl2hVquRk5ODBx98EC6XK+R5OTk5uOSSS/Ddd99h0KBB0Gq1eO2117Bq1SpIkoT3338fDz30EDIyMqDT6WA2mwEAv/76Ky666CIYjUbodDqMGjUKa9asOe3n+Pzzz3HxxRcjPT0darUaXbt2xaOPPgqfzxeMGT16NJYtW4ZDhw4Fu0Hk5OQEH3e5XJgzZw66desGtVqNrKws/O1vf6vx2VwuF+69914kJycjNjYWl156KQ4fPtyg+m6okpIS3HTTTUhNTYVGo0G/fv3w7rvv1ogrLy/HDTfcAIPBgLi4OEyZMgVbt26FJElYsmRJMO7UcQCSJMFms+Hdd98N1kV94xeq/24ffvgh5s2bh4yMDMTGxuLKK6+EyWSCy+XCPffcg5SUFMTExGDatGk16q2h+44QAo899hgyMzOh0+kwZswYbN++vdZyVVVV4Z577kFWVhbUajW6deuGJ598En6/vwG1HPqeSUlJmDVrVnCb3+9HXFwc5HJ5yC+HTz75JBQKBaxWK4Cm1W1VVRWmTp2KuLg4GI1GTJs2DXa7/bTl/Oc//4mEhAS8/fbbIUlFtXHjxuGSSy4J3m/oflSbI0eO4C9/+QtSU1OhVqvRu3dvvP322yExkdgv3nnnHZx33nlISUmBWq1Gr1698Oqrr9YoT/U55eeff8bgwYOh0WjQpUsX/N///V+DPk9DNKS+Tu7OuHDhwuD+vGPHDgDArl27cOWVVyIhIQEajQaDBg3CF198EfIaHo8H8+bNQ/fu3aHRaJCYmIgRI0bghx9+AABMnTo12JJycret+nz22WdQqVQ499xzQ7ZX75+7du3CVVddBYPBgMTERNx99921dnH517/+hby8PGi1WiQkJOCaa65BYWFhSMzo0aPRp08fbNy4Eeeeey50Oh0efPDBkLp5+eWX0aVLF+h0Olx44YUoLCyEEAKPPvooMjMzodVqcdlll6GioiLktesa73PyGKslS5bgz3/+MwBgzJgxwfpZtWpVMP6bb77ByJEjodfrERsbi4svvrjO88jJqsfwrV69GrfeeisSExNhMBhw4403orKyskZ8Q95n6tSpiImJwf79+zFhwgTExsZi8uTJtb7/3Llzcf/99wMAcnNzg5+tumVGkiTMmDED//73v9G7d2+o1Wp8++23ABp2zAIN/64DAq18P//8c42/ExHAFguqxZdffokuXbpg+PDhDYqfPn063n33XVx55ZX461//il9//RULFizAzp078emnn4bE7t69G9deey1uvfVW3HzzzTjzzDODjz366KNQqVS477774HK5oFKpsGLFCowfPx55eXmYM2cOZDJZ8KLjp59+wuDBg+ss15IlSxATE4NZs2YhJiYGK1aswMMPPwyz2Yynn34aAPCPf/wDJpMJhw8fxvPPPw8AiImJARC4iLz00kvx888/45ZbbkHPnj3xxx9/4Pnnn8eePXtCurVMnz4d//rXv3Dddddh+PDhWLFiBS6++OIG1V9DOBwOjB49Gvv27cOMGTOQm5uLjz76CFOnTkVVVVUw0fP7/Zg4cSJ+++033H777ejRowc+//xzTJky5bTvsXTpUkyfPh2DBw/GLbfcAgDo2rXraZ+3YMECaLVaPPDAA9i3bx8WLVoEpVIJmUyGyspKzJ07F7/88guWLFmC3NxcPPzww8HnNnTfefjhh/HYY49hwoQJmDBhAjZt2oQLL7wQbrc7pCx2ux2jRo3CkSNHcOutt6Jz585Yu3YtZs+ejWPHjjWqy5skSTjnnHOwevXq4Lbff/8dJpMJMpkMa9asCf6Nf/rpJwwYMCC47zSlbq+66irk5uZiwYIF2LRpE958802kpKTgySefrLOMe/fuxa5du/CXv/wFsbGxp/1MDd2PalNcXIyhQ4cGL2KSk5PxzTff4KabboLZbMY999wTEh/OfvHqq6+id+/euPTSS6FQKPDll1/ijjvugN/vx5133hnyPvv27cOVV16Jm266CVOmTMHbb7+NqVOnIi8vD7179z5tnTidzhoD52NjY6FWqxtdX++88w6cTiduueUWqNVqJCQkYPv27TjnnHOQkZGBBx54AHq9Hh9++CEmTZqETz75BH/6058ABC4eFyxYENxPzGYzNmzYgE2bNuGCCy7ArbfeiqNHj+KHH37A0qVLT/u5gED3mT59+tSacAKBfS4nJwcLFizAL7/8ghdffBGVlZUhidn8+fPxz3/+E1dddRWmT5+O0tJSLFq0COeeey42b96MuLi4YGx5eTnGjx+Pa665Btdffz1SU1ODj/373/+G2+3GzJkzUVFRgaeeegpXXXUVzjvvPKxatQp///vfg/vJfffdV+vFb33OPfdc3HXXXXjxxRfx4IMPomfPngAQ/Hfp0qWYMmUKxo0bhyeffBJ2ux2vvvoqRowYgc2bN4f8oFSXGTNmIC4uDnPnzsXu3bvx6quv4tChQ8FkurHv4/V6MW7cOIwYMQLPPPNMna07l19+Ofbs2YP//Oc/eP7555GUlAQASE5ODsasWLECH374IWbMmIGkpCTk5OQ0+JhtzHcdAOTl5UEIgbVr14b8aEEEABBEJzGZTAKAuOyyyxoUv2XLFgFATJ8+PWT7fffdJwCIFStWBLdlZ2cLAOLbb78NiV25cqUAILp06SLsdntwu9/vF927dxfjxo0Tfr8/uN1ut4vc3FxxwQUXBLe98847AoDIz88PiTvVrbfeKnQ6nXA6ncFtF198scjOzq4Ru3TpUiGTycRPP/0Usn3x4sUCgFizZk1IHdxxxx0hcdddd50AIObMmVPjtU+Wn58vAIinn366zpiFCxcKAOJf//pXcJvb7RbDhg0TMTExwmw2CyGE+OSTTwQAsXDhwmCcz+cT5513ngAg3nnnneD2OXPmiFNPAXq9XkyZMqXe8lar/rv16dNHuN3u4PZrr71WSJIkxo8fHxI/bNiwkHpu6L5TUlIiVCqVuPjii0P2gwcffFAACCnvo48+KvR6vdizZ0/Iaz7wwANCLpeLgoKC4LaG/G2efvppIZfLg/X74osviuzsbDF48GDx97//XQgRqN+4uDhx7733Bp/XmLqtjv3LX/4Ssv1Pf/qTSExMrLd8n3/+uQAgnn/++XrjqjV0PxKiZv3cdNNNolOnTqKsrCzkNa+55hphNBqDx1u4+4UQtR+748aNE126dAnZVn1OWb16dXBbSUmJUKvV4q9//etpaiPwGWu7VR8nDa2v6mPYYDCIkpKSkPc4//zzRd++fUPOOX6/XwwfPlx07949uK1fv37i4osvrre8d955Z439qj6ZmZniiiuuqLG9ep+79NJLQ7bfcccdAoDYunWrEEKIgwcPCrlcLubPnx8S98cffwiFQhGyfdSoUQKAWLx4cUhsdd0kJyeLqqqq4PbZs2cLAKJfv37C4/EEt1977bVCpVKF1Fddx2p2dnbIMfXRRx8JAGLlypUhcRaLRcTFxYmbb745ZHtRUZEwGo01tp+q+vslLy8vZJ9+6qmnBADx+eefN/p9pkyZIgCIBx54oN73rvb000/X+I6rBkDIZDKxffv2kO0NPWYb+l1X7ejRowKAePLJJxtUdupY2BWKQlR3P2rIr59AYDAhgJDuIgDw17/+FQBqjMXIzc3FuHHjan2tKVOmhIy32LJlC/bu3YvrrrsO5eXlKCsrQ1lZGWw2G84//3ysXr263u4tJ7+WxWJBWVkZRo4cCbvdjl27dp32s3300Ufo2bMnevToEXzvsrIynHfeeQCAlStXhtTBXXfdFfL8U3/BDcfXX3+NtLQ0XHvttcFtSqUSd911F6xWK3788UcAwLfffgulUombb745GCeTyWr8yhtJN954Y8gvokOGDIEQokb/2yFDhqCwsBBerzf4mYDT7zv/+9//gr90ntz1o7b6/eijjzBy5EjEx8eH/M3Gjh0Ln88X0vrQECNHjoTP58PatWsBBFomRo4ciZEjR+Knn34CAGzbtg1VVVUYOXJko177VLfddluN9y4vLw8ek7VpyvHakP3oVEIIfPLJJ5g4cSKEECF1O27cOJhMJmzatCnkOU3dL4DQY9dkMqGsrAyjRo3CgQMHYDKZQp7fq1evkLpPTk7GmWeeiQMHDjSoTi677DL88MMPIbfqc1Rj6+uKK64I+RW5oqICK1aswFVXXRU8B5WVlaG8vBzjxo3D3r17ceTIEQCBvuvbt2/H3r17G1TuhigvL0d8fHydj596Xpg5cyaAE8fmf//7X/j9flx11VUhf/O0tDR07949eA6splarMW3atFrf689//nPIAPIhQ4YAAK6//vqQ8UhDhgyB2+0O1ksk/PDDD6iqqsK1114b8jnkcjmGDBlS43PU5ZZbbgnZp2+//XYoFIpgfTXlfW6//faIfMZRo0ahV69ewfuNOWYb+l1XrXqfam1TZFPrwK5QFMJgMAAIXIg3xKFDhyCTydCtW7eQ7WlpaYiLi8OhQ4dCtufm5tb5Wqc+Vv0FW183HpPJVOcX5/bt2/HQQw9hxYoVNS7OTr04qc3evXuxc+fOkAuFk5WUlAA4UQendm05uZtXuA4dOoTu3bvXGMxe3cxfXc+HDh1Cp06dajSpn/r3iaTOnTuH3K++eMjKyqqx3e/3w2QyITExscH7TvW/3bt3D4lLTk6u8bffu3cvfv/999P+zRpq4MCB0Ol0+OmnnzBu3Dj89NNPmDdvHtLS0rBo0SI4nc5ggjFixIhGvfapTq3H6s9WWVkZPC5P1ZTjtSH70alKS0tRVVWF119/Ha+//nqtMafWbVP3CwBYs2YN5syZg3Xr1tUYZ2IymUIuUE99HyBQd7X1fa9NZmYmxo4dW+tjja2vU89h+/btgxAC//znP/HPf/6z1vcoKSlBRkYGHnnkEVx22WU444wz0KdPH1x00UW44YYbcNZZZzXoc9RFCFHnY6ceU127doVMJgv23d+7dy+EEDXiqp3axSojI6POweqN2R8ANPjv1xDV3yXVF8qnquv4OtWp9RATE4NOnTqF1Fdj3kehUCAzM7NB7306p+57jTlmG/pdV616n2rr6yBR82BiQSEMBgPS09Oxbdu2Rj2voSeY2maAquux6taIp59+Gv3796/1OXX1aa+qqsKoUaNgMBjwyCOPoGvXrtBoNNi0aRP+/ve/N2ggr9/vR9++ffHcc8/V+vipX4gdlVwub9T2Uy90Ivnl5Pf7ccEFF+Bvf/tbrY+fccYZjXo9pVKJIUOGYPXq1di3bx+KioowcuRIpKamwuPx4Ndff8VPP/2EHj161Pml3FANra+T9ejRAwDwxx9/hPXep1N9vFx//fV1JvqnXgA3db/Yv38/zj//fPTo0QPPPfccsrKyoFKp8PXXX+P555+vcew2pd6aS13nsPvuu6/OltrqxPrcc8/F/v378fnnn+P777/Hm2++ieeffx6LFy/G9OnTm1SexMTERl2gn3os+v1+SJKEb775ptZ6PvX8W9/5PdzzRG1OnoijPtV/h6VLl9a6RkmkZnBr7Puo1eqIzXxY177XkGO2sd911ftU9VgPopMxsaAaLrnkErz++utYt24dhg0bVm9sdnY2/H4/9u7dG/wVDwgM9KyqqkJ2dnaTy1HdAmAwGOr8RbEuq1atQnl5Of773/+GzIiSn59fI7auC9uuXbti69atOP/88+u9+K2ug/3794e0UuzevbtRZa5PdnY2fv/9d/j9/pAvououXdX1nJ2djZUrV9aY5nHfvn0Nep+W/AWqoftO9b979+5Fly5dgnGlpaU1Lpq6du0Kq9Xa6P2lPiNHjsSTTz6J//3vf0hKSkKPHj0gSRJ69+6Nn376CT/99FODBjA2R92eccYZOPPMM/H555/jhRdeqDPRrtbQ/ehU1bOd+Xy+iNZtbb788ku4XC588cUXIb9yN7S7SiQ1tb6qVe+vSqWyQfWWkJCAadOmYdq0abBarTj33HMxd+7cYGLR2H2oR48etZ7zqu3duzfkl+59+/bB7/cHBxh37doVQgjk5uY2OimPpPj4+BrrN7jdbhw7dixkW33ncgBISUkJa//du3cvxowZE7xvtVpx7NgxTJgwIaLvU5vG/u0bc8w29LuuWvU+dfJ5m6gax1hQDX/729+g1+sxffp0FBcX13h8//79eOGFFwAgeEI9dbad6l8+wpkZKS8vD127dsUzzzwTnMbzZKWlpXU+t/pXsJN/9XK73XjllVdqxOr1+lq7Rl111VU4cuQI3njjjRqPORwO2Gw2AMD48eMBAC+++GJITCQX3ZswYQKKiorwwQcfBLd5vV4sWrQIMTExGDVqFIDA1KIejyekzH6/v8ELfun1+hZbgKmh+87YsWOhVCqxaNGikL9nbfV71VVXYd26dfjuu+9qPFZVVRXSj7+hRo4cCZfLhYULF2LEiBHBL96RI0di6dKlOHr0aIPGVzRX3c6bNw/l5eWYPn16rZ/v+++/x1dffQWg4fvRqeRyOa644gp88skntbZm1ncsNlZtx67JZMI777wTsfdoqKbWV7WUlBSMHj0ar732Wo2LYCC03srLy0Mei4mJQbdu3UKm+6xe46Ch+9GwYcOwbdu2WqcMBVDjvLBo0SIAJ85pl19+OeRyOebNm1ejBUEIUaPMzaVr1641xke9/vrrNVos6qqfcePGwWAw4PHHH691TZeG7r+vv/56yPNfffVVeL3eYH1F6n1q09i/fWOO2YZ+11XbuHEjJEk67Q+P1DGxxYJq6Nq1K9577z1cffXV6NmzJ2688Ub06dMHbrcba9euDU63CAD9+vXDlClT8Prrrwe7H/3222949913MWnSpJBfdxpLJpPhzTffxPjx49G7d29MmzYNGRkZOHLkCFauXAmDwYAvv/yy1ucOHz4c8fHxmDJlCu666y5IkoSlS5fW2ryel5eHDz74ALNmzcLZZ5+NmJgYTJw4ETfccAM+/PBD3HbbbVi5ciXOOecc+Hw+7Nq1Cx9++GFwPY7+/fvj2muvxSuvvAKTyYThw4dj+fLlDW4lqLZ8+fJa55CfNGkSbrnlFrz22muYOnUqNm7ciJycHHz88cdYs2YNFi5cGBy8O2nSJAwePBh//etfsW/fPvTo0QNffPFFcL7x0/0alZeXh//973947rnnkJ6ejtzc3OAgy0hr6L6TnJyM++67DwsWLMAll1yCCRMmYPPmzfjmm29qNMXff//9+OKLL3DJJZcEpxy12Wz4448/8PHHH+PgwYONbr4fNmwYFAoFdu/eHZwqFgh0XaleW6EhiUVz1e3VV1+NP/74A/Pnz8fmzZtx7bXXIjs7G+Xl5fj222+xfPny4Cq5Dd2PavPEE09g5cqVGDJkCG6++Wb06tULFRUV2LRpE/73v/9FbE77Cy+8ECqVChMnTsStt94Kq9WKN954AykpKbVenDencOqr2ssvv4wRI0agb9++uPnmm9GlSxcUFxdj3bp1OHz4MLZu3QogMAh99OjRyMvLQ0JCAjZs2ICPP/4YM2bMCL5WXl4egMBEEePGjYNcLsc111xT53tfdtllePTRR/Hjjz/iwgsvrPF4fn4+Lr30Ulx00UVYt25dcMrsfv36AQh8Fzz22GOYPXs2Dh48iEmTJiE2Nhb5+fn49NNPccstt+C+++5rVJ02xfTp03HbbbfhiiuuwAUXXICtW7fiu+++q3Es9+/fH3K5HE8++SRMJhPUanVwPZRXX30VN9xwAwYOHIhrrrkGycnJKCgowLJly3DOOefgpZdeOm053G43zj//fFx11VXYvXs3XnnlFYwYMQKXXnopgEDreiTepzbVf/t//OMfuOaaa6BUKjFx4sR6F9Rr6DHb0O+6aj/88APOOeec4JgoohAtOQUVtS179uwRN998s8jJyREqlUrExsaKc845RyxatChkKkCPxyPmzZsncnNzhVKpFFlZWWL27NkhMUIEpgasbTrF6ukpP/roo1rLsXnzZnH55ZeLxMREoVarRXZ2trjqqqvE8uXLgzG1TTe7Zs0aMXToUKHVakV6err429/+Jr777rsa0xFarVZx3XXXibi4OAEgZOpLt9stnnzySdG7d2+hVqtFfHy8yMvLE/PmzRMmkykY53A4xF133SUSExOFXq8XEydOFIWFhY2abrau29KlS4UQQhQXF4tp06aJpKQkoVKpRN++fUOmj61WWloqrrvuOhEbGyuMRqOYOnWqWLNmjQAg3n///WBcbVOi7tq1S5x77rlCq9XWmMr1VHX93ar/FuvXrw/ZXv1+paWlwW0N3Xd8Pp+YN2+e6NSpk9BqtWL06NFi27ZtNaabFCIw5ePs2bNFt27dhEqlEklJSWL48OHimWeeCZkqsiF/m2pnn322ACB+/fXX4LbDhw8LACIrK6tGfGPqtrZ6EaL2fbo+y5cvF5dddplISUkRCoVCJCcni4kTJwanwqzW0P2otvopLi4Wd955p8jKyhJKpVKkpaWJ888/X7z++uvBmEjsF1988YU466yzhEajETk5OeLJJ58Ub7/9do36qOucMmrUKDFq1KjT1FjgM9555531xjSkvk43ZfT+/fvFjTfeKNLS0oRSqRQZGRnikksuER9//HEw5rHHHhODBw8WcXFxQqvVih49eoj58+eH7LNer1fMnDlTJCcnC0mSGjT17FlnnSVuuummkG3Vdb5jxw5x5ZVXitjYWBEfHy9mzJghHA5Hjdf45JNPxIgRI4Rerxd6vV706NFD3HnnnWL37t3BmFGjRonevXvXeG5dddOY/cTn84m///3vIikpSeh0OjFu3Dixb9++Wo//N954Q3Tp0kXI5fIa5/qVK1eKcePGCaPRKDQajejatauYOnWq2LBhQ711WF2mH3/8Udxyyy0iPj5exMTEiMmTJ4vy8vIa8Q15nylTpgi9Xl/v+57q0UcfFRkZGUImk4UcC/Xtxw05ZoVo+HddVVWVUKlU4s0332xU2anjkISIwgg3ImpRn332Gf70pz/h559/xjnnnBPt4hBRC1m6dCnuvPNOFBQUBBezmzt3LubNm4fS0lIOwG2AJUuWYNq0aVi/fn3IL/cd0cKFC/HUU09h//799Q7Wp46LYyyI2hmHwxFy3+fzYdGiRTAYDBg4cGCUSkVE0TB58mR07ty5weOsiOri8Xjw3HPP4aGHHmJSQXXiGAuidmbmzJlwOBwYNmwYXC4X/vvf/2Lt2rV4/PHH+WVA1MHIZLJGTx9OVBulUomCgoJoF4NaOSYWRO3Meeedh2effRZfffUVnE4nunXrhkWLFoUMAiUiIiKKNI6xICIiIiKisHGMBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERNbvrr78eGo0Ge/bsqfHYE088AUmS8NVXXwEAPvjgA1x//fXo3r07JEnC6NGjW7i0RETUFJIQQkS7EERE1L6VlJSgR48e6N+/P1asWBHcnp+fj969e2PChAn4+OOPAQCjR4/Gxo0bcfbZZ2PLli0466yzsGrVqiiVnIiIGootFkRE1OxSUlLw5JNPYuXKlXj33XeD2++44w4olUq88MILwW1Lly6FyWTCihUrkJ6eHo3iEhFREzCxICKiFjF9+nScc845uO+++1BeXo73338f3377LR577DFkZGQE47KysiCT8euJiKitYVcoIiJqMdu3b8eAAQMwadIk/PTTT8jMzMSvv/5aZyLRp08fJCUlsSsUEVEboIh2AYiIqOPo3bs37rvvPixYsAByuRzLli1j6wQRUTvBszkREbWopKQkAEB6ejr69OkT5dIQEVGkMLEgIqIWU1hYiDlz5qBPnz4oLCzEU089Fe0iERFRhDCxICKiFjNjxgwAwDfffIM///nPmD9/Pg4cOBDlUhERUSQwsSAiohbx6aef4osvvsCjjz6KzMxMLFy4ECqVCnfeeWe0i0ZERBHAxIKIiJqdxWLBXXfdhQEDBmDmzJkAAmMsHn30UXz77bf46KOPolxCIiIKF6ebJSKiZnf33XfjpZdewi+//IKzzz47uN3n82Hw4MEoKirCrl27EBsbi9WrV2P16tUAgEWLFkGn0+Gmm24CAJx77rk499xzo/IZiIiofkwsiIioWW3cuBFDhgzB7bffjkWLFtV4fP369Rg6dChmzJiBF154AXPnzsW8efNqfa05c+Zg7ty5zVxiIiJqCiYWREREREQUNo6xICIiIiKisDGxICIiIiKisDGxICIiIiKisDGxICIiIiKisDGxICIiIiKisCmiXYCW5vf7cfToUcTGxkKSpGgXh4iIiIio1RJCwGKxID09HTJZ/W0SHS6xOHr0KLKysqJdDCIiIiKiNqOwsBCZmZn1xnS4xCI2NhZAoHIMBkOUS0NERERE1HqZzWZkZWUFr6Hr0+ESi+ruTwaDgYkFEREREVEDNGQIQVQHb69evRoTJ05Eeno6JEnCZ599dtrnrFq1CgMHDoRarUa3bt2wZMmSZi8nERERERHVL6qJhc1mQ79+/fDyyy83KD4/Px8XX3wxxowZgy1btuCee+7B9OnT8d133zVzSYmIiIiIqD5R7Qo1fvx4jB8/vsHxixcvRm5uLp599lkAQM+ePfHzzz/j+eefx7hx42p9jsvlgsvlCt43m83hFZqIiIiIiGpoU2Ms1q1bh7Fjx4ZsGzduHO655546n7NgwQLMmzevmUtGLcXvF7C5vbA4vai0uWFxeSFEtEtFRJESq1EgK14Ho04Z7aIQUSvk8/ng8XiiXYxm5/X5YXd7YXP5YHJ4kGbUIE6napb3UiqVkMvlEXmtNpVYFBUVITU1NWRbamoqzGYzHA4HtFptjefMnj0bs2bNCt6vHtlObYfL64PV6YXZ4UGp1QWbywu3T0AhSVArI3MgEFHrcLTKgTKLC5kJWmTE6aBV8RgnosBaCkVFRaiqqop2UZqNXwgIEfjX7xcQQPDH0zK7DJWy5lt/LS4uDmlpaWGv8damEoumUKvVUKvV0S4GNYIQAlaXF1aXFxVWNyrtbjg8Pvj9gFYlR6xGCbWCFxtE7ZFRq4Td7cW+EiuKzS5kJ+qQZtBAIY/qkEAiirLqpCIlJQU6na5dLHIshIDPL+A/6V+/ACQAkgTIjn9Gr19Ao5BDqYj8eVAIAbvdjpKSEgBAp06dwnq9NpVYpKWlobi4OGRbcXExDAZDra0V1Ha4vX5YnB5YnB6UWt2wOj1wef1QymTQqRVIjlFC3oyZOhG1HjqVAlqlHCaHB9uOmFBkciI7UY+kGFW7uJggosbx+XzBpCIxMTHaxQmL3y/gO55I+HwCflmgZUIhB2QyCbWd4mQ+AY2yeRILAMFr6JKSEqSkpITVLapNJRbDhg3D119/HbLthx9+wLBhw6JUImoqIQRsbh8sTg8q7W5U2DxwuL3wCwGtQoFYjRJJbJUg6rAkSUKcToVYjRLlNhe2FlYhzahBVoIORi3HXxB1JNVjKnQ6XZRL0ngnt0p4fYGk4uRWCYVMCtyJsuq69Xg8bTexsFqt2LdvX/B+fn4+tmzZgoSEBHTu3BmzZ8/GkSNH8H//938AgNtuuw0vvfQS/va3v+Evf/kLVqxYgQ8//BDLli2L1kegRnB7/bC6vDA73CizumF1euHy+iCXyaBXKZAco2GrBBGFkMskpMRq4PL6cKTKgTKrC1nxOmTEa6HhGCuiDqWttFie3Crh9QWSCgEBGSRIMgnKVtizM1J1G9XEYsOGDRgzZkzwfvUg6ylTpmDJkiU4duwYCgoKgo/n5uZi2bJluPfee/HCCy8gMzMTb775Zp1TzVJ0CSFgd/tgcXpRYXcFWiVcPvjhh1ahgF6tQGIMx78Q0empFXKkG7WwubzYW2JBsdmJ7CQ9UmPVHH9BRFEV0irhP/F/CYGuTXKZ1GaSonBJQnSsyTrNZjOMRiNMJhMMBkO0i9PueHx+WJzewFgJiyvQKuHzQS4FWiW0KjlbJYgoLEIIVDk8sLt9SI5VITtRj0Q9x18QtVdOpxP5+fnIzc2FRqOJdnEAnNIq4a+exSmQTMhkEiJ5qePxCeiacYwFUH8dN+bamT/zUFiEELC5vCgyObHzmBm/5Vdg48FK7Dxmhs3lg16tQLpRh1SDBjEaBZMKIgqbJEmI16mQZtCgyubBloIq7DxmhtnZ/ue2J6LoEELA6/fD7fUF1pdwe2F3e+H2+iGEgFwmQSmXQSFvXFJx6OBB6NVKbN26pcHPWbJkCeLi4hr9GVpCmxq8Ta2Dx+eH1RlYpK7U6oTl+FgJmSSDXiVHcqyaCQQRNTu5TEKKITD+orDCgVKLC1kJOqTHcfwFEYXvdK0SSl7r1MDEgk5LCAGHxxdc7brc5obD7YPPL6BWyKBXK5CgYzcEIooOtUKO9DgtrC4v9hRbUGJ2oXNioKWUP3IQUUMJcXzWJj/g9fuDYyWAQCtERxor0VTsCkW18vr8qLS5UVhhx+bCSvyWX4EthVUorHRAApAUo0Z6nBaJMWpolHIeaEQUdTFqBToZtXB7/fj9cBV+P1yFcqsLHWwoIVHHYrPVfXM6Txvrt1jhMVvgNFthc/tgd/ng8HjhNVsh2e1QOh1QOu2QO+yQ7LYmFfH7777D2DGjkJ6ShKxOqbhi0mU4sH9/rbGrf/wRerUS3379NQbnDUCCIQZjR43Atm3basR+99136NmzJ2JiYnDRRRfh2LFjwcfWr1+PCy64AElJSTAajRg1ahQ2bdrUpPI3BhMLAlA9g5MXxWYndhcdHytxqBLbj5phtnuhVynQyaBBmkGDWA0XqyOi1kkmSYjXq5Aaq0G51Y3NhVXYWWSGheMviNqnmJi6b1dcERqbklIjRmaIhdJogOKSi0PGSsT27I6YxDjoEkJvTWG32zDz7nvw09pf8NW330Emk+Gaq66E3++v8zn/mP0AFjz5NFavXYfEpCRc/qfLgut5BF7TjmeeeQZLly7F6tWrUVBQgPvuuy/4uMViwZQpU/Dzzz/jl19+Qffu3TFhwgRYLJYmfYaGYleoDszrC6wrYXF6UWZ1wezwwOHxQy5J0KnkSNSrOI0jEbVJCrkMqQYNnB4fCivsKLW4kJ2gQ6c4LdRcfJOoQ6keK6FA3WvRVU8L2xwm/enykPuvvv4GsjM6YefOHYjRx9T6nNkPPYTzx44FACx+42306p6LTz/9FFdddRWAwEJ2ixcvRteuXQEAM2bMwCOPPBJ8/nnnnRfyeq+//jri4uLw448/4pJLLonYZzsVE4sOxnF8tesquwdlVhfsbh+8fgGNQgadSoF4nYzdmoio3dAo5Ug36mB1erGryIoiswvZiTqkxHL8BVG7YLXW2CREYGyET5LD6/YeHysB4NARyKRAy2aNSx1Z6A+pjj37ECn79u7Fo4/Mw4bffkN5eVmwpaKwoBA9e/as9TlDhgwN/j8+IQFnnHEmdu7cGdym0+mCSQUAdOrUCSUlJcH7xcXFeOihh7Bq1SqUlJTA5/PBbreHrA/XHJhYtHO1tUo4vYFWCa2SrRJE1DHEaBTQqeWosnvwe2EVUg0adE7UI16n5I8pRG2ZXg8AgUTi+MxN1YvUCQCST0AmSVDKARhqbx2o73Uj4c+X/wlZnTvjpVcXo1OnTvALP84e0B8et7vJr6lUKkPuS5IUMp5sypQpKC8vxwsvvIDs7Gyo1WoMGzYM7jDesyGYWLRD1a0SJocHZRY3bG4vvH4/NAo5WyWIqMOSSRIS9Cp4fAqUWd0ot7uRYdQiK0EHvZpfh0RtSbBVwg/4/P7AdLDHr6tPzOAU3TICQHl5Ofbs2Y2XXl2Mc0aMAACsXfPzaZ/322+/IqtzZwBAVWUl9u7dU2frRm3WrFmDV155BRMmTAAAFBYWoqysrAmfoHF4Jm0HfH4Bq9MLs9ODCpsbVQ43nB4/ZBKgVSqQoFdByVYJIiIAgPKk8RcHy20osQS6R3UyaqFqxpVtiSg8fiHg9fmD60oEWyUQ+OFAKUPdgyiiJD4+HomJiXj7rTeQlpaGwsJCPPzQg6d93oL585GQkIiU1BTM+ec/kZSUhEmTJjX4fbt3746lS5di0KBBMJvNuP/++6HVasP4JA3DM2gb5fT4UGJxYm+xBesPVmD9oQpsP2pChc0NjUJ+fAYnLYxaJZMKIqJaaJRyZMTpoJBJ2HnMjE0FlSgyOeH3c3paotZACAGL04MSixMerx8Otxd2tw9Orx/+4AxOUmC161aYVACATCbDkqX/xpZNm3H2wP74+/1/xfwFT572eY8+Nh/3/3UWRgwdgpLiYvz308+hUqka/L5vvfUWKisrMXDgQNxwww246667kJKSEs5HaRBJdLAJvs1mM4xGI0wmEwwGQ7SL02DVrRIWlwfl1uOtEm4fZDIJWqUCOpWcCQQRURP5hUClzQ23zx8Yf5GgQ7y+4V/iRBQZbq8fFqcHFqcHpVY3rE4PvG4XkmBG5+wcaDWaVplARMrqH3/E+AvH4khxKeLi4gAAHp+ATimHshlbVJ1OJ/Lz85GbmwuNRhPyWGOundkVqhVzenwwOz0wOzwos7phdQXGSqjlcuhUcsRrudo1EVEkyCQJiTFqeHx+lFpcKLe5kBmnQ0a8luMviJqREAK242NDK21uVNg9cLi98AsBrUKBWI0SKo0MwmKBTEK7TiraA54tWxGfXxyfwSnQKmFyBA4uSZKgUymQoONYCSKi5lQ9/sLh9uFAmQ0lFic6J+iQxvEXRBHj9gZmrDQ73Ci1umFzeuHy+iCXyaBXKZAcEzodtPD6olhaagwmFlHm9PhgcXphcrhRZg3M4OTx+aGSyaFXy2HUaiFjqwQRUYvSquTIUGlhcniw85gZRWYnshP1SI5RQ8b1L4gapbpVwur0osLuQoXNA4fLBz/80CoU0KsVSIxRR7uYrcK5o0bB5vKcPrCVYmIRBX6/QJHZiVKLC2anBw53IBPXKgPdm9gqQUTUOhi1SsSoFaiyu7G1sAppxsD4izgdx1+0V36/gNXthd3lQ2DOIQqH2xvoXmh1euHy+SCXjrdKxKq5SGU7xMQiCuweH/YUW+D1CcSoFTAYlGyVICJqpeSyE+Mvis1OlFtdyIwPjL/Qqfg12h64vMd7D9g9KLO6Ar0HvH6OY4wAAQFNdauEMrxWiQ4231CLql4NPFw8I0aJEILrSxARtSFKuQxpBi3sbi/2l1hRbHEhO0GHNKOG5/I2xu8XsLm9sDi9KLe5YLJ7YHf7IBDoPWDUqDimpjWRK+EHUFJ0DAlJSVAqO1aLodcvIPPJ4WuGfVIIAbfbjdLSUshkskZNaVsbJhZERESNoFMpoFXKYXZ6sf2o6fj4Cx2S9Bx/0Zq5vIE+/maHB6VWF2wuL9w+AYUsMEFKKnsPtFqSJEFuSIPDVoFjR49FuzgtzicEVHJZs3Yd0+l06Ny5M2Sy8JIXJhZERESNJElScPxF5fHxF6kGDbIT9DDqlNEuHiHwS2xgpkUvKm1uVNrdgVYJERicH6tRQq2QR7uY1ECSXAF5bDIg/IC/Y80SVWFx4swUA5Jjm2eAu1wuh0KhiEjXPyYWRERETSSXSUiKUcPt9aPI5ESF1Y3MeC0y4nXQqnjR2tKqF1czOzwoswUWV3N5/VDKZNCpFUiJVXLAcBsmSRIgyQFZxzq2fDIflCp1jYXrWiMmFkRERGFSKWToZAyMv9hXenz8RaIOqQaOv2hOJy+uVmF1o/L4+k9CABpFoFUiia0SRC2GiQUREVGEVI+/MDk82HbEhCJTYP2LpBgVZxiKkLoWV1PIZNDVsrgaEbUcJhZEREQRJEkS4nQqxGqUqLC5saWwCp2MGmQl6GDUcvxFY9W6uJrbC78QXFyNqJVhYkFERNQM5DIJybGB8RdHqhwos7qQGa9FZrwOGiW759TH4/PD4gy0SpRZ3YHF1bw+yGXHF1djqwRRq8TEgoiIqBmpFDKkG7WwubzYW2xFidmF7CQ9UmPVUHD8BYBAq4TdHVikrsLuQqXNA7vLB5/wQ6tkqwRRW8HEgoiIqAXo1QroVHJUOTz447AJRbEqZCfqkajvmOMvPD5/YF0JpwelFhesruOtEpIMOpUcybFqtkoQtTFMLIiIiFqIJEmI16lg0AiU21yotFUhPU6DzAQdDJr2Pf6iulXC6gqsK1Fuc8Ph9sHnF9Ao5dCrFEjUs1WCqC1jYkFERNTC5DIJKbEauLw+FFY4UGpxITNeh4x4bbsaf+E9PlbC4vSi1OqE5fhYCZkkg14lR1IMWyWI2hMmFkRERFGiVsiRHqeF1eXF3hILSi0udD6+/kVbvOAWQsDh8QVXu66wBVa79voFNAoZ9GoFEnQds+sXUUfAxIKIiCjKYtQK6FVyVNk9+P1wFVINGnRO0CGhDYy/8PoC60oEWyUcXjg8fshlEnRKORL1Kg5SJ+ogwkos3G438vPz0bVrVygUzFGIiIiaSpIkxOtViPUpUGENjEFIj9MgK16H2FY2/sLu9sLq9KLS7ka5NbRVQqdSIF4na/UJERFFXpOyAbvdjpkzZ+Ldd98FAOzZswddunTBzJkzkZGRgQceeCCihSQiIuooFHIZUgwaOD0+FFbYUWpxITtBh05xWqgV0Rl/cXKrRJnVBbPDA4fHD4VMgpatEkR0XJPOArNnz8bWrVuxatUqaDSa4PaxY8figw8+iFjhiIiIOiqNUo50ow4qmRy7iqzYXFCFYyYHfH7RIu/vcPtQYnZiT5EF6/MrseFgJbYfNcFk90CrVCDdqEGqQQODVsmkgogANLHF4rPPPsMHH3yAoUOHhjR19u7dG/v3749Y4YjqI5dJUMgkeP2ixb5o2xvWYWSwHiOD9Vi7GI0COvXx8ReFx8dfJOoRr1NGtLuRzy+C60qUW90wOd1wevyQSxI0bJUgigq5TEKMWoG20rOwSYlFaWkpUlJSamy32WzsU0nNTq2QIU6vRJxWCZPDA6NWiSq7B1V2D1xef7SL1yawDiOD9RgZrMfTk0kSEvQqeH0KlNvcKLe7kWHUIjNBhxh108c4Otw+WFwemOyBZMLq9sLr80OjkEOrkiNe2/oHjxO1RyefF6vsKsTpVLC7vdCpWveY5iaVbtCgQVi2bBlmzpwJAMGTzptvvolhw4ZFrnREp1ArZMhM0GLxj/uxZO1BmB1eGLQKTBuei1tHdcHhCgcvRE6DdRgZrMfIYD02jkIuQ0psYPzFwXIbSiwuZCfqkGbUNGj8RXWrhMUVSCSqHIFF6uQyCVplYCpYJVsliKKqvvPiHaO7Qt2K17ppUmLx+OOPY/z48dixYwe8Xi9eeOEF7NixA2vXrsWPP/4Y6TISBcXplVj84368uHxfcJvZ4cULy/cCACYP6YxisytaxWsTWIeRwXqMDNZj02iUcmTE6WBxerDzmBnHTE7kJOqRHFtzwTmnxwez0wOzw4MyqxtWlxdevx9quRw6tkoQtTqnOy/eOqpLq225aFKpRowYgS1btuCJJ55A37598f3332PgwIFYt24d+vbtG+kyEgEI9DM0apVYsvZgrY+/szYft47qgstfWYNKu6dlC9dGxOuU+H7WKNZhmFiPkdGQerxzTFeUWd0cc1GHWI0SerUClTZ3yPoXMpkES/VYCYcHDrcXkiRBp2KrBFFrJpdJiDvNtc6dY7q1bKEaocnpTteuXfHGG29EsixENfj8AnuLLdhwqBIVNjcentgLZoe31lizw4sKmxt6tRKHq5wtXNK2ITNeh3Krm3UYJtZjZDSkHk0OLxQyiYlFPWSShMQYNTw+P0otLpTZAi08Ht+JVok4rZatEkStmMPtw9bDgZnfpo3Irfe8aHF6kBijbuESNkyTEouCgoJ6H+/cuXOTCkMEACaHBxsPVWLjoUpsLqiExRU4uBL0KiTGqGDQKmo94AxaBZJj1fjbuDPh8fEipDZKuYQUg5p1GCbWY2Q0pB5jNQqUmJmcNYRSLkOqQQOX1weZJLFVgqgVE0LgcKUDGw9VYsOhCmw/aobXL5CgV+G+cWee5rzYuhbMPFmTEoucnJx6f/nw+XxNLhB1PH4hsK/Eig0HK7DhUCX2lVhx8qWYXi3HgKx4DMqOR5XNg2nDc4P9DE82bXguTHYPUgyaGo/RCSYH6zASWI+RUV89ThmWg9V7SvHKqv2YPiIX2Yn6KJSw7YnWInpEVD+nx4ffD5uw4VAFNh6qRIkldPxYmkGDvOx4HK1y1Pv94vX7oWraUnTNrkmJxebNm0PuezwebN68Gc899xzmz58fkYJR+2Z2eLCpoBIbCyqx6VAlzM7QrLxLkh552fHIy45HjzRDcDCi1eXFraO6AAj0M6xtBhmqX5XNwzqMANZjZNRXjzeNyMWfF6/F7mIr7np/My7q0wnXDe4Mo7b1/lpHRFRNCIGjVc5gIrHtqCmkFVshk9Anw4hBx693MuICXRb9ftR5Xmzts0JJQoiItdMvW7YMTz/9NFatWhWpl4w4s9kMo9EIk8kEg8EQlTJYXV6szy+HUdtxBtD5hcD+Eis2FgRWb91TbAlpldCp5BiQFYe87HgM7Bxfb99BtUKGOJ0ScTolTA4vjFoF57xvJNZhZLAeI6O+ejxUbsfba/Kx7kA5gEAL5nWDO2NCn05crI2IWh2X14c/jpiw8WAlNhyqRNEpXTlTYtXIyw70wjgrMw6aOpKEk8+LVQ4P4rQqeP3+qMwG1Zhr54gmFvv27UO/fv1gs9ki9ZIRx8Si5VidXmwuDCQSmwoqUeUInR0nJ1GHvOwEDMqOR4+02EZfJHCV3vCxDiOD9RgZ9dXj74er8MZPB3Cw3A4AyIzX4qYRuRiUnRCNohIRBR2tCoyV2FhQiT8Om+D2nfhhSSGT0DvdcDyZSEBmfOMmUpDLJFTa3eiWEoPUKHWvbcy1c5PSHrPZHHJfCIFjx45h7ty56N69e1NektoBIQQOlNmw4fjA691FZpx8baBVytH/eKtEXnY8ksKc0cDHi7iwsQ4jg/UYGfXV41mZcVh49QD8sKMY//r1EA5XOjDvyx3Iy47HTSNykRWva+HSElFH5fb6se2I6XgvjAocNYW2SiTFnNwqYQyrlcHnF7C6vIhcM0DzatInjYuLq5FtCSGQlZWF999/PyIFo7bB6vJiS2EVNh7vP3jqnP2dE3TBvoM9OxnadQsNETUvuUzCRX3SMLJ7Et5fX4ivfj+KjYcqsaWwChf37YRrz+6MGE3rXDSKiNq2IrMzMIPTwQr8fsQE90ndXeUyCb06GYLXO50TdB12eucmnYFXrlwZcl8mkyE5ORndunWDQsGTensmhMDB8hOtEjuPhbZKaJQy9Ms80SqREstZcYgosvRqBW4akYvxfdLw1s/5+O1gBb7YehQrd5dg8pBsXNQ7rcbq00REjeHx+bH9qBkbDlZgY0ElDleGTsiRoFcFE4n+WXGtdiXslhbRMRZN9fLLL+Ppp59GUVER+vXrh0WLFmHw4MG1xi5ZsgTTpk0L2aZWq+F0Nmyec46xaDy7O9AqseFQYAancps75PGseG2w72CvdLZKEFHL2lRQiTd/zkdhRWD8RXaCDtNHdkH/rLjoFoyI2pQSsxMbCwI/nG49XAWn50SrhEwCenYyBLs45STqW6xV4qjJgX6ZcUgztqMxFl988UWDC3DppZc2OPaDDz7ArFmzsHjxYgwZMgQLFy7EuHHjsHv3bqSkpNT6HIPBgN27dwfvd9TmpuYihEBBhf34oi2V2HHMHNLvWaWQoV+mEYOyE5CXHR+1wURERAAwsHM8Fl0Th2+3HcO/fy3AoQo7/vn5NgzJTcBfzslFepw22kUkolbI4/Nj5zFzsBdGwfEfJ6rF65TBH077ZcUhRs1WidNpcIuFTNawX6ElSWrUAnlDhgzB2WefjZdeegkA4Pf7kZWVhZkzZ+KBBx6oEb9kyRLcc889qKqqatDru1wuuFwnFiAxm83Iyspii8UpqpeSrz64yqyhi7ZkxGmD3Zv6pBuhUrSOchMRnczi9OA/vxVg2R/H4BeBGVkm9kvH1YOyoOdFAVGHV2Z1BWZwOj4+y+E5cc0qk4Az0060SuQm6SFrBT9et8sWC78/8nOyu91ubNy4EbNnzw5uk8lkGDt2LNatW1fn86xWK7Kzs+H3+zFw4EA8/vjj6N27d62xCxYswLx58yJe9rauein56kVbqpeSr6aSy9A388SiLZ2M/MWPiFq/WI0St5zbFRf16YS3fj6ATQVV+HTzEazcVYLrh2ZjbM9Ujr8g6kC8Pj92FlmOJxMVwSmrq8VplRjYOR6DcgJjJWI1XIAzHFH9+aasrAw+nw+pqakh21NTU7Fr165an3PmmWfi7bffxllnnQWTyYRnnnkGw4cPx/bt25GZmVkjfvbs2Zg1a1bwfnWLRUcUWEr+RKvEqUvJdzJqgq0SfTOMUCta78qORET16Zygw9yJvbHhUCXe+jkfR6oceGnlPnz9xzHcPLIL+mQYo11EImom5VYXNhUEunNvKayC3X2iVUICcEZqLAblxCOvczy6psS0ilaJ9qLJiYXNZsOPP/6IgoICuN2hg3nvuuuusAtWl2HDhmHYsGHB+8OHD0fPnj3x2muv4dFHH60Rr1aroVaHt15CWyWEwJHji7ZsOFSJbUdMIa0SSrmEvhnGYP9B9kMmovZEkiScnZOA/llx+PqPY/jPbwU4UGbD7E//wDldEzH1nFykcYwYUZvn8wvsKjIHuzgdKAtdqNmgUWBgdiCRGNA5HkYtWyWaS5MSi82bN2PChAmw2+2w2WxISEhAWVkZdDodUlJSGpxYJCUlQS6Xo7i4OGR7cXEx0tLSGvQaSqUSAwYMwL59+xr9Odojp8eHbUdMwVaJ2paSH5QTWO26b4axzqXkiYjaC6Vchsv6Z2D0mSn496+H8N32IqzZX47fDlZgUv8MXJmXyakiidqYSps7OIPT5sJK2FyhrRLdU2OQ1zkeg3IS0DU5hl0gW0iTzqT33nsvJk6ciMWLF8NoNOKXX36BUqnE9ddfj7vvvrvBr6NSqZCXl4fly5dj0qRJAAJjOZYvX44ZM2Y06DV8Ph/++OMPTJgwoSkfpV04WuUIJhLbjtRcSr7P8VaJvOx4ZMY1bil5IqL2wqhV4o7R3TC+Tye8+fMB/H7YhI82HsbynSW4cVg2xvRIYZcIolbK5xfYU2w53gujAvtLQ1slYtUKDDg+VmIgWyWipkmJxZYtW/Daa69BJpNBLpfD5XKhS5cueOqppzBlyhRcfvnlDX6tWbNmYcqUKRg0aBAGDx6MhQsXwmazBdequPHGG5GRkYEFCxYAAB555BEMHToU3bp1Q1VVFZ5++mkcOnQI06dPb8pHaZOql5LfcKgCGw5V4tgpS8knx6qDg67PyoiDVsVWCSKiarlJejx2WR/8kl+Bd9bk45jJiYXL92LZ8fEXPTtFZ8ZAIgpVZXdjU0EVNh6qwOaCKlhc3pDHuyXHIC8nHoM6x6N7aixbJVqBJiUWSqUyOP1sSkoKCgoK0LNnTxiNRhQWFjbqta6++mqUlpbi4YcfRlFREfr3749vv/02OKC7oKAgZKrbyspK3HzzzSgqKkJ8fDzy8vKwdu1a9OrVqykfpc0oMjmx8XgicepS8gqZhF7phmCTX1Y8WyWIiOojSRKGdUnEoOx4fLH1KD5YX4i9JVb87ZPfcW73ZEwdnoPk2I45Po8oWnx+gX0l1uD1zr4SK05eE0GvlmNAVmAq2IGd4xGvV0WtrFS7Jq28feGFF2Lq1Km47rrrcPPNN+P333/HXXfdhaVLl6KyshK//vprc5Q1IlrLyts7jpqgUcjrTAA8vkCrRPXA6yNVoUvJJ8WokNc5Hnk5CeiXaWT/YCKiMFTa3Vj6yyH8b0cxBAILgV4xIAOXD8zkWLQOSC6ToJBJ8PpFyAKx1DgNqUeTw4PNx8dKbCqohNkZ2irRJUkfmGQmJwFndtBWiba0jkWTEosNGzbAYrFgzJgxKCkpwY033oi1a9eie/fuePvtt9GvX78mF765RTuxcLi9kMtkqLK7YdQpYbJ7UGX3wOX1B5eS33CwEr8fCV1KXi6T0DMtFoNyEpDXOR7ZiTq2ShARRdi+Eive/PkAth81Awj8iDNlWA5GnZHMc24HoFbIEKdXIk6rhMnhgVGrRNVJ39PUMPXVo8Pjw/4Sa3Bs6J5iS0irhE4lx4CsOOQdb5VIjGHLYbtPLNqyaCYWLo8Pr6zaj3fW5sPs8MKgVWDq8BzcNKIL7vz3Jvy8rywkPkGnQt7xeZb7Z8Vx1VgiohYghMCa/eV4Z01+cL2fHmmxuHlkF5yRGhvl0lFzUStkyEzQYvGP+7Fk7cHg9/S04bm4dVQXHK5wMLlogLrqcerwHNx0Ti6mvrMemwurQp6Tk6hDXnZgxsoeabFQyGW1v3gH1e4Ti8ceewyTJ09Gbm5ukwsZLdFKLBxuLxb/eAAvLN9b47GZ53VD3wwjbvvXRvRIM2BQdmBWg5xEPX8hIyKKEpfXh8+2HMXHGwuDLcjnnZmCG4dl81fUdijVqMa/fjmEF5fXnL7+rvO74YKeqfhgw+EolKxtuXpQJn7YUYwXV9Ssx+rrnbvf34L+x1sl8rLjkcTjqV7tPrHo168ftm3bhiFDhuD666/HVVddhaSkpCYXuCVFK7Fwe/0YNP8HmB3eGo8ZtAr89uBY/FFogoYzOBERtSrlVhf+b90hrNhdAgDQKGW4Mi8Lk/qnQ63gObut8/kF8susGN+3EwY//r86v6d/mX0+Rjy5EhU2dy2vQgCQoFfh57+PwdAFy+usx/UPjsXuIgt/OG2EtpRYNKlvzdatW7F9+3b8+9//xjPPPIN77rkHF1xwASZPnoxJkyZBp9M1qeDtmcXpqfUgAwCzwwuL0wujTslmViKiViYxRo17LzgDF5/VCW/8dAC7iiz41y+H8P32Ikw7JxfndE3kRVIbU2FzY9OhSmwoqMSWgkpkxuswKCeh3u9pk92Dy/ql4+gpk6nQCelxWlTZ67/eMTu90CjlvN5pp5rcab937954/PHH8fjjj2PNmjV47733cM899+C2226D2WyOZBnbhViNEgatos4M3qhVoPR4X14iImp9zkiNxVNXnIUf95Ti3XUHUWJx4clvd6F3ugE3j+yCrskx0S4i1aF6cbUNxxdXO3DK4moOjxdJMep6v6cTY1T486AszhJVD7lMQlKMitc7HVhERgPr9XpotVqoVCpYLJZIvGS74/P7MW14bq1jLKYNz0WV3cOTFRFRKydJEkafmYKhXRLx302H8cnmI9h+1Ix7P9iCsb1SccPQbMTrOLd+axBYXC0wZfvmgipYT11cLSUmuJhs95RYWFwefk+HyecXqHKwHjuyJicW+fn5eO+99/Dee+9h9+7dGDVqFObNm4crr7wykuVrN7QqBe4Y3RUAQmaFOnm2CSIiahs0SjmuG5KNC3qlYcnag1i9txQ/7CjGz3vLcPXZWbi0XzqUnNmmRfn8AntLLMH1n/aVWEMej1ErMLBzYMDwgM7xNRLAKpsHt47qAoDf0+FgPXZsTRq8PXToUKxfvx5nnXUWJk+ejGuvvRYZGRnNUb6Ii/Y6Fna3FwqZDFUON4za0HUsiIiobdp5zIzXfzoQvJjtZNRg2jm5GJqbwPEXzah6cbUNxxdXs5yyuFrXZH1wGtMzGrC4mlohQ5xOiTidEiaHF0atgutYNAHrMbLa0uDtJiUW//jHPzB58mT06tUL1U9vKyfOaCcWQMNW3iYiorbFLwRW7irB/607hAp7YOagfplGTB/RBTlJ+iiXrn3wC4F9JVZsrGNxNb1Kjv6d4wNdnDrHI17ftG5pXHk7MliPkdHuEwsAeOutt/D8889j795AH7ru3bvjnnvuwfTp05vyci2mtSQW6/PLYdSq2FRORNTOONw+fLSxEJ9tOQKPT0AmAeN6p2HykGwYtcpoF6/NMTs82FxYhY2HKrCpoAomhyfk8dwkfXCsRI80w2lbJYjamraUWDRpjMXDDz+M5557DjNnzsSwYcMAAOvWrcO9996LgoICPPLII015WSIiojZPq5LjxmE5uLB3Gpasycea/eX4ZlsRVu8pxTWDO+Pivp34o1I9/ELgQKkNGw9VYMPxVomTf+zWKuUYcHysRF7neC5WSNSKNKnFIjk5GS+++CKuvfbakO3/+c9/MHPmTJSVlUWsgJHGFgsiImpJfxwx4Y2fDiC/LDDFaUacFtNH5GJQTkKUS9Z6WJ1ebC48MVaiyh7aKpGdoMOgnEAi0bOTAQp+d1IH0u5bLDweDwYNGlRje15eHrze2hdFISIi6oj6Zhjx/FX98b+dxfjXL4dwpMqBeV/twMDO8Zg+IhdZCR1vUVkhBPLLbNhwfKzEriJzjVaJfllG5HVOQF52PJJj2SpB1BY0KbG44YYb8Oqrr+K5554L2f76669j8uTJESkYERFReyGXSRjXOw0juiXhgw2F+HLrUWwqqMSM/1RiQt9OuG5wZ8Rq2vf4C5vLiy2FVcGB19UD3KtlJeiQ1zkeg3Li0auTgS36RG1Qk9exeOutt/D9999j6NChAIBff/0VBQUFuPHGGzFr1qxg3KnJBxERUUelVyvwl3NycVHvNLy9Jh+/5lfgq9+P4cfdpbhuSGeM79Op3Qw+FkLgYLn9+LoSFdhVZAmZGUitkKFfZlywi1OKITrdPIgocpo0xmLMmDENe3FJwooVKxpdqObEMRZERNRabCmswhs/HUBBhR1A4Ff7m0fkYkDn+CiXrGnsbi+2FlYFuziV20JbJTLitMEZnPpkGPkdSNQAbWmMRZOnm22rmFgQEVFr4vMLfLu9CP/+9VBwgbfBOQm4aUQu0uO0US5d/YQQKKiwB7s3bT9mDmmVUClkOCvDeDyZSIjahRFRW9aWEosmd4UiIiKi8MllEi7u2wmjuifjP+sLsOyPY/jtYAU2FVTikrPScfXZWYhRt56va4fbh62Hj4+VKKhEqcUV8ni6UYO87HgMyk5A7wwD1Ap5lEpKRC2t9ZypiIiIOrAYjQI3j+yCi/qk4a2f87HxUCU+23IEK3eXYPKQzriwV1pUxl8IIXC4yoGNBwOJxLYjJnhPbpWQy9An2CoR3+pbWYio+TCxICIiakWy4nWYO7E3NhyqwFs/5+NwpQOvrNqPb7YV4eYRueibGdfsZXB6fPj9sAkbCyqx4WAFSk5plUgzaELGSmiUbJUgIiYWRERErdKg7AT0z4zD19uO4b3fCpBfZsODn23DsC6J+Ms5uRHtby2EwNEqJzYWVGDDwUpsO2qCx3eiVUIhk9A3wxhY7To7HhlxWkhS+5i9iogih4kFERFRK6WQy3BpvwyMOiMF7/1WgG+3HcO6A+VYf7ACk/pn4M+DMqFTnfgql8skKGQSvH4RMoi6Ni6vD38cMQW7OB0zOUMeT4lVB8dKnJXJVgkiOj0mFkRERK2cUavE7aO6YkKfNLz5cz62FFbh402HsXxXMW4cmoPxfdOQEKNCnFYJk8MDo1aJKrsHVXYPXF5/8HWOmRzYcDyR+OOwCW7ficcUMgm90w3BZCIznq0SRNQ4TCyIiIjaiOxEPR65tDd+OxgYf3HM5MRXfxzD1HNy8M7afCxZexBmhxcGrQLThufi1lFd8P22YqzYXYINBytw9JRWiaSY6laJeJyVaQxp/SAiaiyeQYiIiNoQSZIwJDcRAzvH48utRzGiexLeXpOPRSv2BWPMDi9eWL4XfiHQN8OIL7YeBRDoKtWrkyE48Lpzgo6tEkQUMUwsqM3y+QW8fv/pA4mozZBLEhRcOLRBlHIZ/jwoC11T9Lj3wy21xry77iB+mX0+Lh+QgTPTYtE/K46tEkTUbHh2oTbF5fXB7vbB4fZBJgNXLidqZ7y+wA8GGoUcWpUcWqWcv6jXQyGTYHZ4YHZ4a33c7PDC5vLhtlFdQ8ZaEFHrJYSAwxO43nF5fdAo5IjCEjZNwsSCWjW/ELC7fbC7vPAKAZVcgl6tQFa8FgatEloVZykhak/cXj8sTi/KrC6YnR5UOTyQSxI0Sjn0KjlbM07h9QsYtUoYtIpakwuDVgGjVlFjdWwial08Pn/gh1OPF34BaJQyJOhVSNCrYNAoEatpG5fsbaOU1KG4vX7Y3F443D5IEqBTyZFq1CBBr0KsRgG9SgFZW0ndiahRdCogTqdCVoIODrcPFmdgZqNyqxvlNjd8QkAtl0GnUkCjlHX41gyfX6DK4cG04bl4YfneGo9PG56LKrvntFPPElHLEkLA6fHD7vbC6fVBIZNBr1IgJ1EPo1aJWE3b/PGUiQVFnV8IONw+2NxeeP0CSrmEmOOtErHaQJauVrS9g4uIwqNVBbpDpRg08Pr8sLq8J1ozHB5U2P2QSxJ0Kjl0Hbg1o8rmwa2jugAA3lmbX2NWqMMVjiiXkIgAwOvzw+b2wenxwScENAoZjDolusbEIFajQIxa0ebPY5IQokP9jGE2m2E0GmEymWAwGKJSBqvLi/X55TBqVR12jIDbG8jS7R4fJAQuIOJ0SiTq1YjRKBDDVgkiqkN1/2OL04squxvlVjfsbh+8/sAXdUdszVArZIjTKRGnU8Lk8MKoVdS6jgURtZzQVgk/FDIJepUCiTEqxOnaTqtEY66d2WJBLSKkVcInoFRI0KkU6BanhVGnRIxawVVdiahBJClw/tCpFEg93pphcXphdXlRanXC7PCi0uGHTJKgU8qhVysgb+c/VLi8fhSbXSizuqGQSSi1uNj9iSgKvMfHStjdgVYJrbK6VULdblol6sPEgpqNx+eHzRXaKpEcq0Zi9cHFVgkiigCFXIZ4vQrxehUy47XB1oxKmxsVNjfKrIGLbLVCBr1aAbWi/bZm+PyCCQVRCxJCwOUNXO9Ut0roVHJ0TtQiThcYG9qRZrdjYkER4xcCTo8PNpcPHp8fCkWgya9rnAZGbeDgYqsEETWnU1szPD4/rM7A2IxSqxMWpxcVdh9kkgx6lRw6VftvzSCiyPL5RfCHU78I/GgRq1Wga6wGMWoFYjXtu1WiPkwsKCyeYJNfYJpDjVKOxBgVkmLViFEHmvz4pU1E0aI8qTUjK0ELu/t4a4Y90JpRanHBJ/zQKhXQqeT88YOIaji5VcLl9UN+vFUiK16LeH3Ha5WoDxMLapSQRVt8PijlgenRuiTr2SpBRK2aJAXWwdGrFUgzBlozLE4vLE4PSi0uWJ1eVNhdkEuy4zNN8YcRoo7K5xewu72wuX3wCz/UCjlitQp0idEEunNrFB12Ap76MLGg0zq5VUIIAa1KgQS9CokxKsRqlGyVIKI2SSk/sQBV5wRdsDWjwu5CpS2QbPjhh0bB1gyijsB5/IdTh8cb+IFBLUdmvBbxx8dK6FRslTgdJhZUw6mtEgqZDDFqBXKT9DDqlDBolPyCJaJ25dTWDLc3sG6G2eFGmdUdaM2wuSA/voiVViXnDypEbVx1q0RgBic/1HI5YjQK5CTpEHt8tWu2SjQOEwsCcMpS8n4BjUp+olVCrUSMhq0SRNRxqBQyJCgCrRnZiQK246uAB8ZmeFBqdcIvBLQKBXRqORfxJGojqlslnF4vZAi0SqTHa5CgU7NVIgKYWHRQ1a0SDrcvsJS8XAa98vhS8jolYtVtY9EWIqLmJklScDKKTkYt3F4/LE5PYGyG1Q2L04NyrwsKWWBxPrZmELUePv+JdbR8/sBYiRiNAtmJWhi0KsSoFVAp2CoRKUwsOpDqpeQdHi/8AtAoZYjTBVolDBq2ShARNYRKIUNiTGBNnuxEAasrsDhfhdWNSsfx1gw/oFXK2ZpBFAUurw92lw8OrxcySYJWpUB6vOb4WAkl9GyVaDZMLNqx0KXkA2Ml9CoFshP0bWopeSKi1kqSpON9sZXoZNTC5fXB6vTC7PCgzHa8NcPnhkKSoFMHpqTkDzhEkVXdKmF3e+H1+6FSyBCjUaJzojZ4fLJVomV03MTCZgPktVxUy+WARhMaVxeZDNBqmxQrc9ghEx5ItQ0KkiQIre7EXYcdEHWspHpKrM9mg8PpgcPjg18AaoWEeK0SiUY1YjQKxCQYTyza4nAANn/dZdbrT/zf6QR8vsjE6nRA9S8FLhfg9UYmVqsN1DMAuN2AxxOZWI3mxL7SmFiPJxBfF7UaUCgaH+v1BuqiLioVoFQ2PtbnC/zt6qJUBuIbG+v3B/a1SMQqFIG6AALHhN0emdjGHPctdI6Avf7jHjpd02IdjkA91+XkY7kxsTxHAADUfh/UkheJOgnZGhVsbhmsx9fNqLI6UCIpIWRyaFVy6CUBtai7vEIVeo6QPHWfI0JivV5I7rqPe6EMPUc0ONbng+Sq57hXKCFOOkc0ONbvh+Ss77hvRKxcAXHScS856jvuGxErk0OcdNxL9vqO5UbESjKIk477RsU24tqgcbEOQNR93AudvmmxTifgr/u4b1SsVgeXzw+7ywenzQ653wetUoZOusC4KP3xrouSJAE6Vas6R7TJ64j6vrtOJToYk8kkAAhT4BCreZswIfQJOl3tcYAQo0aFxiYl1R07aFAwzOL0CHt6Zp2xjjN6iN8Lq4I3xxk96ox1ZWaJX/eXi5U7i8U3fxwTVb371V2GpKTQ8o4aVXesThcaO2FC3bGn7kZXXll/rNV6InbKlPpjS0pOxN5xR/2x+fknYu+7r/7YbdtOxM6ZU3/sb7+diH3qqfpjV648EfvSS/XHfvXVidh33qk/9sMPT8R++GH9se+8cyL2q6/qj33ppROxK1fWH/vUUydif/ut/tg5c07EbttWf+x9952Izc+vP/aOO07ElpTUHztlyolYq7X+2CuvFCHqi22Bc4QQQojs7Lpje/UKje3Vq+7Y7OzQ2EGD6o7lOeLErZnOEVVffycOlFjErwfKxK6HHq83Nn/JB8HvgYJnX6439uCrS4KxB19dUm9swbMvB2Pzl3xQb+zhR58Oxu7/8Mt6Y4/+45Fg7N4vV9QbW3Tv34Oxu/+3rt7YkltnBmN3rt1ab2zZjdODsdu37Ks3tuLKa4Oxf+w+Uv/f7eLLQr6X64s1nXdhSKxPW/c5wjL0nJBYT0JinbG2swaExLoys+qMbex1xMmxtrMG1BnrSUgMibUMPafOWJ9WFxJrOu/Ceuvt5Niqiy+rN/aH3/aLFTuLxG/55cJ89XX1xra1c0RrvI4wAQKAMJlM4nQ6botFO+HzCzg8Xhh0CnSN0UCv5p+UiKi1MmpVMCbHIDtRD1ei/vRPIKIaeqcbEJNohF6lgEyjjHZx6CSSEEJEuxAtyWw2w2g0wnT0KAwGQ82AFujmYHV5sXFHIYwa5YluSSc7pVkSdhvcHt/xpeQF5DJAp5IjXq9CnF6N2ATDiaXk2c0h8H82YTY+ll2hTtxnV6jGx/Ic0fjYk457lzewOJ/Z7kG5zQWb2wu3TAW5WhlYN0PyQ+6t+3XZFeo4doU6/mDb7ArlF4GxEg6LFT6PDwo5EKNWIEmvRow2sK6EWiHvkOeIWrXQdYS5tBTG9HSYTKbar51P0nETiwZUTnOxurxYn18Oo1ZV58IroUvJC6gVMsRqFEjmUvJERO2a3y9gdXthcXpRbnXB5PDA7g5ckOmUcujVPP9T++H2+mFze+Hw+CAB0KrkiNMpkag/PjZUpYCMEx5EVWOunVvFmenll19GTk4ONBoNhgwZgt9++63e+I8++gg9evSARqNB37598fXXX7dQSZuX0+NDhc2No1UOlFldEACy4rXonxWHs3MSMLBzPDon6hCvrzshISKitk0mk2DQKJERp8VZmSfO/92SY6BUyFDpCHxPlFpcsLu98Hes3wepjfMLAZvLixKLE0erHDA53VArZOiWHIOB2fEYnJuAvhlxSI/TwqBRMqloY6LeIf+DDz7ArFmzsHjxYgwZMgQLFy7EuHHjsHv3bqSkpNSIX7t2La699losWLAAl1xyCd577z1MmjQJmzZtQp8+faLwCZrO5xdwejwnlpJXyBGjDiwlX72uBBMIIqKOTaOUQ6OUIzlWjdwkPSzH180oswRaM0yOQLcKLVszqJXy+Pywubywe463vKkC+3NiTGC1a7ZKtB9R7wo1ZMgQnH322XjppZcAAH6/H1lZWZg5cyYeeOCBGvFXX301bDYbvvrqq+C2oUOHon///li8eHGNeJfLBddJ/cjMZjOysrKi3hVq48EKuL0COrUc8Xoll5InIqJGc3oCYzNMDjfKrG7Y3F54fH5I4PcItRICUCgk6FUKJMeoYTg+VkKj5DpabUVjukJFtcXC7XZj48aNmD17dnCbTCbD2LFjsW7dulqfs27dOsyaNStk27hx4/DZZ5/VGr9gwQLMmzcvYmWOBK1Sjm6psdAq5YhlqwQRETVRaGvGiVXAfT52j6LWQS6XEKNWIFbNVomOIKqJRVlZGXw+H1JTU0O2p6amYteuXbU+p6ioqNb4oqKiWuNnz54dkohUt1hEk1wmISNOe/pAIiKiBpLLJBi1Shi1nH6TiKIj6mMsmptarYa6ejo5IiIiIiJqFlHtg5OUlAS5XI7i4uKQ7cXFxUhLS6v1OWlpaY2KJyIiIiKi5hfVFguVSoW8vDwsX74ckyZNAhAYvL18+XLMmDGj1ucMGzYMy5cvxz333BPc9sMPP2DYsGENes/qsepmszmsshMRERERtXfV18wNmu9JRNn7778v1Gq1WLJkidixY4e45ZZbRFxcnCgqKhJCCHHDDTeIBx54IBi/Zs0aoVAoxDPPPCN27twp5syZI5RKpfjjjz8a9H6FhYUCAG+88cYbb7zxxhtvvPHWwFthYeFpr7OjPsbi6quvRmlpKR5++GEUFRWhf//++Pbbb4MDtAsKCiCTneixNXz4cLz33nt46KGH8OCDD6J79+747LPPGryGRXp6OgoLCxEbG9vi07pWDxwvLCyM2lS31PZxP6JI4H5E4eI+RJHA/aj1E0LAYrEgPT39tLFRX8eiI2nMPMBEdeF+RJHA/YjCxX2IIoH7UfvCBRSIiIiIiChsTCyIiIiIiChsTCxakFqtxpw5c7iuBoWF+xFFAvcjChf3IYoE7kftC8dYEBERERFR2NhiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREREREYWNiQUREze7666+HRqPBnj17ajz2xBNPQJIkfPXVVygvL8fTTz+Nc889F8nJyYiLi8PQoUPxwQcfRKHURETUGJIQQkS7EERE1L6VlJSgR48e6N+/P1asWBHcnp+fj969e2PChAn4+OOP8dVXX+Hyyy/HhAkTMGbMGCgUCnzyySdYuXIlHn74YcybNy+Kn4KIiOrDxIKIiFrEG2+8gVtuuQVLlizBlClTAADjx4/H2rVrsWPHDmRkZCA/Px8ymQzZ2dnB5wkhMHbsWKxZswbl5eXQ6/XR+ghERFQPdoUiIqIWMX36dJxzzjm47777UF5ejvfffx/ffvstHnvsMWRkZAAAcnNzQ5IKAJAkCZMmTYLL5cKBAweiUXQiImoARbQLQEREHYMkSXjttdcwYMAA3H777fjpp58waNAg3Hnnnad9blFREQAgKSmpuYtJRERNxK5QRETUoh588EEsWLAAcrkcv/32GwYOHFhvfEVFBXr27IkzzzwTq1evbqFSEhFRY7ErFBERtajqVof09HT06dOn3li/34/JkyejqqoKixYtaoniERFREzGxICKiFlNYWIg5c+agT58+KCwsxFNPPVVv/MyZM/Htt9/izTffRL9+/VqolERE1BRMLIiIqMXMmDEDAPDNN9/gz3/+M+bPn1/ngOx58+bhlVdewRNPPIEbbrihJYtJRERNwMSCiIhaxKeffoovvvgCjz76KDIzM7Fw4UKoVKpaB2+//PLLmDt3Lu655x78/e9/j0JpiYiosTh4m4iImp3FYkGvXr2QnJyM9evXQy6XAwBefPFF3H333fjwww/x5z//GQDwwQcf4LrrrsO1116LpUuXQpKkaBadiIgaiIkFERE1u7vvvhsvvfQSfvnlF5x99tnB7T6fD4MHD0ZRURF27dqFnTt3YuTIkTAajXjyySehVCpDXmf48OHo0qVLSxefiIgagIkFERE1q40bN2LIkCG4/fbba53Zaf369Rg6dChmzJiBAQMGYNq0aXW+1jvvvIOpU6c2Y2mJiKipmFgQEREREVHYOHibiIiIiIjCxsSCiIiIiIjCxsSCiIiIiIjCxsSCiIiIiIjCxsSCiIiIiIjCxsSCiIiIiIjCpoh2AVqa3+/H0aNHERsby9VciYiIiIjqIYSAxWJBeno6ZLL62yQ6XGJx9OhRZGVlRbsYRERERERtRmFhITIzM+uN6XCJRWxsLIBA5RgMhiiXhoiIiIio9TKbzcjKygpeQ9cnqonF6tWr8fTTT2Pjxo04duwYPv30U0yaNKne56xatQqzZs3C9u3bkZWVhYceeghTp05t8HtWd38yGAxMLIiIiIiIGqAhQwiiOnjbZrOhX79+ePnllxsUn5+fj4svvhhjxozBli1bcM8992D69On47rvvmrmkRERERERUn6i2WIwfPx7jx49vcPzixYuRm5uLZ599FgDQs2dP/Pzzz3j++ecxbty4Wp/jcrngcrmC981mc3iFJiIiIiKiGtrUGIt169Zh7NixIdvGjRuHe+65p87nLFiwAPPmzWvmkhERERFRc/P5fPB4PNEuRruiVCohl8sj8lptKrEoKipCampqyLbU1FSYzWY4HA5otdoaz5k9ezZmzZoVvF89AIWIiIiI2gYhBIqKilBVVRXtorRLcXFxSEtLC3sphjaVWDSFWq2GWq2OdjGIiIiIqImqk4qUlBTodDquRRYhQgjY7XaUlJQAADp16hTW67WpxCItLQ3FxcUh24qLi2EwGGptrSAiIiKits3n8wWTisTExGgXp92pvoYuKSlBSkpKWN2iojorVGMNGzYMy5cvD9n2ww8/YNiwYVEqERERERE1p+oxFTqdLsolab+q6zbc8StRTSysViu2bNmCLVu2AAhMJ7tlyxYUFBQACIyPuPHGG4Pxt912Gw4cOIC//e1v2LVrF1555RV8+OGHuPfee6NRfCIiIiJqIez+1HwiVbdRTSw2bNiAAQMGYMCAAQCAWbNmYcCAAXj44YcBAMeOHQsmGQCQm5uLZcuW4YcffkC/fv3w7LPP4s0336xzqlkiIiIiImoZUR1jMXr0aAgh6nx8yZIltT5n8+bNzVgqIiIiIiJqrDY1xoKIiIiIqL04ePAgJEkKDgtoiCVLliAuLq7ZyhQOJhZERERERBQ2JhZERERERBQ2JhZERERE1DbZbHXfnM6GxzocDYttgm+//RYjRoxAXFwcEhMTcckll2D//v21xq5atQqSJGHZsmU466yzoNFoMHToUGzbtq1G7HfffYeePXsiJiYGF110EY4dOxZ8bP369bjggguQlJQEo9GIUaNGYdOmTU0qf2MwsSAiIiKitikmpu7bFVeExqak1B07fnxobE5O7XFNYLPZMGvWLGzYsAHLly+HTCbDn/70J/j9/jqfc//99+PZZ5/F+vXrkZycjIkTJ4asMWG32/HMM89g6dKlWL16NQoKCnDfffcFH7dYLJgyZQp+/vln/PLLL+jevTsmTJgAi8XSpM/QUG1q5W0iIiIiorbkilMSnLfffhvJycnYsWMHYupIVubMmYMLLrgAAPDuu+8iMzMTn376Ka666ioAgYXsFi9ejK5duwIAZsyYgUceeST4/PPOOy/k9V5//XXExcXhxx9/xCWXXBKxz3YqJhZERERE1DZZrXU/JpeH3i8pqTtWdkonnoMHm1ykU+3duxcPP/wwfv31V5SVlQVbKgoKCtCrV69anzNs2LDg/xMSEnDmmWdi586dwW06nS6YVABAp06dUHLS5ysuLsZDDz2EVatWoaSkBD6fD3a7PWR9uObAxIKIiIiI2ia9PvqxpzFx4kRkZ2fjjTfeQHp6Ovx+P/r06QO3293k11QqlSH3JUkKWRtuypQpKC8vxwsvvIDs7Gyo1WoMGzYsrPdsCCYWRERERETNoLy8HLt378Ybb7yBkSNHAgB+/vnn0z7vl19+QefOnQEAlZWV2LNnD3r27Nng912zZg1eeeUVTJgwAQBQWFiIsrKyJnyCxmFiQURERETUDOLj45GYmIjXX38dnTp1QkFBAR544IHTPu+RRx5BYmIiUlNT8Y9//ANJSUmYNGlSg9+3e/fuWLp0KQYNGgSz2Yz7778fWq02jE/SMJwVioiIiIioGchkMrz//vvYuHEj+vTpg3vvvRdPP/30aZ/3xBNP4O6770ZeXh6Kiorw5ZdfQqVSNfh933rrLVRWVmLgwIG44YYbcNdddyElJSWcj9Igkji5Q1YHYDabYTQaYTKZYDAYol0cIiIiIqqH0+lEfn4+cnNzodFool2cZrVq1SqMGTMGlZWViIuLa7H3ra+OG3PtzBYLIiIiIiIKGxMLIiIiIiIKGwdvExERERG1AqNHj0ZbHqXAFosoEELA5vJGuxhERERERBHDxCIKHB4fdh4zo9jsjHZRiIiIiNqEtvxLfmsXqbplYhEFfgFYnB7sLrKg0ta8KyASERERtWXVq0zb7fYol6T9qq7bU1f0biyOsYgih9uH3UUW9M4wIFYT3h+SiIiIqD2Sy+WIi4tDSUkJAECn00GSpCiXqn0QQsBut6OkpARxcXGQy+VhvR4TiyhKilGjzObCnmILeqcboVGG98ckIiIiao/S0tIAIJhcUGTFxcUF6zgcTCyiSJKA1FgNikwO7JFb0LOTAUo5e6cRERERnUySJHTq1AkpKSnweDzRLk67olQqw26pqMbEIsrkMgmpBg2OVjmhksvQPTUWchmb94iIiIhOJZfLI3YRTJHHn8dbAYVchqQYFQ5V2HGwzMZZD4iIiIiozWFi0UqoFXLEaZU4UGbDkSpHtItDRERERNQoTCxaEZ1KAa1Sjr3FFpRYuMYFEREREbUdTCxaGaNWCQkS9hRZUGXnGhdERERE1DYwsWiFEmPUcHr82FVkgc3ljXZxiIiIiIhOi4lFK5USq4bJHlid2+nxRbs4RERERET1YmLRSklSYBraEosT+0qs8Pr80S4SEREREVGdmFi0YnKZhJRYDY5U2nGg1Aa/n9PQEhEREVHrxMSilVPKZUjQq5FfZkVBBde4ICIiIqLWKazEwu12Y/fu3fB6OcC4OWmUchi1KuwrseGYidPQEhEREVHr06TEwm6346abboJOp0Pv3r1RUFAAAJg5cyaeeOKJiBaQAvRqBTRKOfYUW1BmdUW7OEREREREIZqUWMyePRtbt27FqlWroNFogtvHjh2LDz74IGKFo1BGrRJCALuLLDA5PNEuDhERERFRUJMSi88++wwvvfQSRowYAUmSgtt79+6N/fv3R6xwVFNSjBoOtw+7i8ywu9kFjYiIiIhahyYlFqWlpUhJSamx3WazhSQa1DySY9WosLmxp8gCl5drXBARERFR9DUpsRg0aBCWLVsWvF+dTLz55psYNmxYZEpGdZJJEtIMWhSZndjPNS6IiIiIqBVQNOVJjz/+OMaPH48dO3bA6/XihRdewI4dO7B27Vr8+OOPkS4j1aJ6jYvCSjtUchm6JMdAJmNrERERERFFR5NaLEaMGIEtW7bA6/Wib9+++P7775GSkoJ169YhLy8v0mWkOijlMsRr1ThQZsPhSnu0i0NEREREHViTWiwAoGvXrnjjjTciWRZqAq1KDp9fiT0lVqgUcqQZNad/EhERERFRhDUpsahet6IunTt3blJhqGliNAp4/H7sLjZDpZAhQa+KdpGIiIiIqINpUmKRk5NT7+xPPh9nKmpp8ToVSixO7C4yo3eGEQaNMtpFIiIiIqIOpEmJxebNm0PuezwebN68Gc899xzmz58fkYJR4yXHqFFkdmJPkQW9043QquTRLhIRERERdRBNSiz69etXY9ugQYOQnp6Op59+GpdffnnYBaPGkyQJqQYNjpmc2FNsQc9OBqgUTRqfT0RERETUKBG96jzzzDOxfv36SL4kNZJMkpAaq8YxkxP7Sy3w+UW0i0REREREHUCTWizMZnPIfSEEjh07hrlz56J79+4RKRg1nUIuQ3KMGofKHVDJ5eiSrOeK6ERERETUrJqUWMTFxdW4UBVCICsrC++//35ECkbhUSlkiNcpcaDMCpVChqwEXbSLRERERETtWJMSi5UrV4bcl8lkSE5ORrdu3aBQNHlpDIownUoBr09gb4kFaoUMKQaucUFEREREzaNJYyxGjRoVchs5ciR69OjR5KTi5ZdfRk5ODjQaDYYMGYLffvutztglS5ZAkqSQm0bDC+a6GLRKyCUZdhdbUGlzR7s4RERERNRONTgT+OKLLxr8opdeemmDYz/44APMmjULixcvxpAhQ7Bw4UKMGzcOu3fvRkpKSq3PMRgM2L17d/A+xw/UL0GvQonZid3FFvTJMCJGzVYlIiIiIoqsBl9hTpo0qUFxkiQ1aoG85557DjfffDOmTZsGAFi8eDGWLVuGt99+Gw888ECd75GWltag13e5XHC5XMH7pw487yiSY9U4Zj6+gF66ERol17ggIiIioshpcFcov9/foFtjkgq3242NGzdi7NixJwokk2Hs2LFYt25dnc+zWq3Izs5GVlYWLrvsMmzfvr3O2AULFsBoNAZvWVlZDS5feyJJElJjNSg1u7Cn2AKPzx/tIhERERFROxLV1dPKysrg8/mQmpoasj01NRVFRUW1PufMM8/E22+/jc8//xz/+te/4Pf7MXz4cBw+fLjW+NmzZ8NkMgVvhYWFEf8cbYVcJiHFoMHRKgf2l1jh5xoXRERERBQhTe5sb7PZ8OOPP6KgoABud+ig4LvuuivsgtVl2LBhGDZsWPD+8OHD0bNnT7z22mt49NFHa8Sr1Wqo1epmK09bo5TLkKhX42C5DWqFDDlJXOOCiIiIiMLXpMRi8+bNmDBhAux2O2w2GxISElBWVgadToeUlJQGJxZJSUmQy+UoLi4O2V5cXNzgMRRKpRIDBgzAvn37Gv05OiqNUo54nQr7S21QKeXIiNNGu0hERERE1MY1qSvUvffei4kTJ6KyshJarRa//PILDh06hLy8PDzzzDMNfh2VSoW8vDwsX748uM3v92P58uUhrRL18fl8+OOPP9CpU6dGf46OTKdSQKOUY2+RBaUW1+mfQERERERUjyYlFlu2bMFf//pXyGQyyOVyuFwuZGVl4amnnsKDDz7YqNeaNWsW3njjDbz77rvYuXMnbr/9dthstuAsUTfeeCNmz54djH/kkUfw/fff48CBA9i0aROuv/56HDp0CNOnT2/KR+nQjFolAGB3sQUmuyfKpSEiIiKitqxJXaGUSiVkskBOkpKSgoKCAvTs2RNGo7HRg6OvvvpqlJaW4uGHH0ZRURH69++Pb7/9Njigu6CgIPheAFBZWYmbb74ZRUVFiI+PR15eHtauXYtevXo15aN0eIkxahSbndhZZEbfDCP0XOOCiIiIiJpAEkI0emqgCy+8EFOnTsV1112Hm2++Gb///jvuuusuLF26FJWVlfj111+bo6wRYTabYTQaYTKZYDAYolIGq8uLHUdN0CjkrWLgtBACR00OpBo06JVugFrBNS6IiIiIqHHXzk3qCvX4448HxzTMnz8f8fHxuP3221FaWorXX3+9KS/ZYTjcXqjkMuQk6nFmp1ikGtRQK6I6629gwUGDFsVmJ/YWW+HlGhdERERE1EhNarFoy6LZYuHy+PDKqv14Z20+zA4vDFoFpg3Pxa2juuBwhQMub3Qv6D0+P0osTnRJikG3lBjIZNFvTSEiIiKi6Gn2FovHHnsM+fn5TSpcR+Vwe/HKqv14YflemB1eAIDZ4cULy/fitR8PIE6njHIJQ9e4KKy0R7s4RERERNSGNCmx+Oijj9CtWzcMHz4cr7zyCsrKyiJdrnZHLpPhnbW1J2PvrM1HnE4JeStoIdAo5YjVKLG32IpjJke0i0NEREREbUSTEoutW7fi999/x+jRo/HMM88gPT0dF198Md577z3Y7fyluzYWpyfYUnEqs8MLk8MLRStILAAgRq2AWiHDniILyq1c44KIiIiITq/Jo4Z79+6Nxx9/HAcOHMDKlSuRk5ODe+65p8ErZnc0sRolDNrap3I1aBWIUcuxtbCqZQtVjzidCj4/sKvIArOTa1wQERERUf0iMh2RXq+HVquFSqWCx8OL0Nr4/H5MG55b62NThuXgp71luP+T3zH/6x04WtU6uiAlxahgd3uxu8gCh9sX7eIQERERUSvW5MQiPz8f8+fPR+/evTFo0CBs3rwZ8+bNQ1FRUSTL125oVQrcMbor7j6/e7DlwqBV4O7zu+P2UV3x64EKyCTglwMVuPO9TXhnTT7s7tq7TrUUSZKQEqtBudWFPcUWuKM8axURERERtV5Nmm526NChWL9+Pc466yxMnjwZ1157LTIyMpqjfBEX7QXy7G4vFDIZqhxuGLVKmOweVNk9cHn9OFRuw1s/52Pz8S5RcVolrh+ajbE9U6M6sNvnFygyO9A5QYcz0wytYpA5ERERETW/xlw7Nymx+Mc//oHJkyejV69eqH56a1hBuiGinVgA9a+8LYTA+oOVeOvnAzhqcgIAuiTrcfOILuiTYYxGcQEAbq8fZTYnuibHoGtyTJv5exMRERFR0zX7Ohbz58/HunXr0KdPH2g0Gmg0GvTp0wdvvvlmkwrcEdlcXnj9NXM6SZIwODcBL103EDedkwu9So4DpTbM/vQPPPHtLhSbnVEoLaBSyBCvVeNAqQ2FFZz5i4iIiIhC1T5N0Wk8/PDDeO655zBz5kwMGzYMALBu3Trce++9KCgowCOPPBLRQnZESrkMkwZkYEyPFPzrl0P4fkcR1uwrw2/55fjTgExcOTATWpW8RcukVcnh8yuxt9QKtVKOVIOmRd+fiIiIiFqvJnWFSk5Oxosvvohrr702ZPt//vMfzJw5s1UvmNdaukKtzy+HUauCUt6wRqP8Mhve/OkAfj9iAgAk6FSYMjwbo89MgayFuyVV2tyABPTJMCJBr2rR9yYiIiKiltPsXaE8Hg8GDRpUY3teXh683ujOZNRe5Sbp8dikPnhwfA+kGTSosLvx/P/24v6Pt2LXMXOLliVer4LH58eeIgssXOOCiIiIiNDExOKGG27Aq6++WmP766+/jsmTJ4ddKKqdJEkY1jUJL183EFOG5UCrlGNPsRX3f/I7nvl+N0otLbdKdnKMGmanB7uLLHB6uMYFERERUUfXpK5QM2fOxP/93/8hKysLQ4cOBQD8+uuvKCgowI033gilUhmMfe655yJX2ghoq12halNpc2PpL4fwv53FEAgMsL5yYCb+NCADGmXzj7/wC4FjJic6xWnQM80AlSIi6y0SERERUSvR7NPNjhkzpkFxkiRhxYoVjX35ZtWeEotq+0qseOOnA9hxvEtUUowaU4fn4NzuSc0+LazX50exxYXsRC3OSOUaF0RERETtSbMnFm1Ze0wsgMD6Fz/vK8M7aw8Gu0T1TIvF9JFdcEZqbETeoy4urw9lVje6p8SgS7Kea1wQERERtRPNPnibWh9JkjCyezJenTwQk4d0hlohw84iC/760VY8/789KLc23/gLtUKOeJ0SB8psOFLlaLb3ISIiIqLWi4lFO6NWyHHN2Z3x2vV5GHNmMgBgxa4S3PbvjfhwQyHcXn+zvK9OpYBOKcfeYgtKLNFZxI+IiIiIooeJRTuVGKPGrAvOxDNX9sOZqbFwevxY+ssh3P7vjVizrwzN0QPOoFVCgoTdRRZU2d0Rf30iIiIiar2YWLRzZ6bF4qkrz8JfLzgDiXoVSiwuPPHtLjz46R84UGqN+Pslxqjh8vixq8gCq4trmhARERF1FEwsOgCZJGH0mSlYfH0erj47Cyq5DNuOmnHPB1uwaMVeVEa4dSElVg2T3YPdRWaucUFERETUQTCx6EA0SjmuH5KNVycPxMjuSRAAvt9RjFuXbsR/Nx2GxxeZ8ReSJCHVoEGpxYW9xdaIvS4RERERtV5MLDqgFIMGfxvXA09c3hfdkmPg8PjwztqDuPO9Tfg1vzwi4y/kMgkpsRocqbLjQKkNfn+HmtWYiIiIqMNhYtGB9U434tmr+uHu87ojXqfEMZMTjy3biYe/2I5D5bawX18plyFRr8bBMisKKmzNMmCciIiIiFoHJhYdnEySMLZXKhZfn4crB2ZCIZOwpbAKd72/Ga+s2geTwxPW62uUchi1KuwrseGYidPQEhEREbVXTCwIQGAdiinDc/Dq5DwM65IIvwC+2VaEW/+1AV9sPQJvGOMk9GoFNEo59hRbgquCExEREVH7wsSCQqQZNXhwQk88PqkPcpP0sLl8eOOnfMx8fzM2HKpo8usatUpAALuLLTDZw2sFISIiIqLWh4kF1apvZhyev6o/7hzdDUatEocrHZj35Q7M/XI7CivtTXrNxBg1nG4fdhebYXdzjQsiIiKi9oSJBdVJLpNwUZ80vHZ9Hib1z4BCJmHjoUrM/M9mvPHTAVidjU8OkmPVqLS7safIApeXa1wQERERtRdMLOi09GoFbhqRi5evG4jBOQnw+QW+2HoUt/xrA5b9cQy+RkwlK5MkpMZqUWR2Yl+JNayxG0RERETUejCxoAZLj9Pin5f0wiOX9kbnBB0sTi8W/7gfd7+/GVsKqxr8OtVrXBRW2JFfxjUuiIiIiNoDJhbUaAM6x+PFawbgtnO7IFatwKEKO/75+TY8tmwHjlY5GvQa1Wtc5JfZmjxmg4iIiIhaDyYW1CRymYSLz0rHazfkYeJZnSCTgF/zK3Dne5vw9pp82FynH3+hUcoRq1Fib4kVRVzjgoiIiKhNY2JBYYnVKHHLuV3x0rUDMbBzPLx+gU83H8Ft/9qI77YXnXb8RYxaAZVcht1FZpRbucYFERERUVvFxIIiIitBh3mX9sacS3ohI06LKocHL63ch1kfbsEfR0z1Pjdep4LPD+wussDs5BoXRERERG0REwuKqEE5CXjp2gGYPiIXepUcB8psePDTP7Dgm50oMtfd3SkpRgWb24vdRRY43JyGloiIiKitYWJBEaeQy3BZ/wy8dsMgjO+TBpkErN1fjjv+vRH/t+5grYvjSVJgpqhyqwt7ii1wezkNLREREVFbwsSCmo1Rq8Qdo7vhhasHoF+mER6fwEcbD+O2f23E/3YWwy9Cx18E1rjQ4JjJgX2llkatj0FERERE0cXEgppdTpIej17WB/+Y0BOdjBpU2j14Yfle/PWjrdhxzBwSq5DLkByjQUG5HfllVgjB5IKIiIioLWBiQS1CkiQM7ZKIl68biGnDc6BVyrGvxIq/f/I7nv5uF0osJ8ZfqBQyxOtUyC+z4XBlw9bFICIiIqLoYmJBLUopl+HygZl47YY8XNArFRKA1XvLcPu/N+Hfvx6C0xMYuK1TKaBXKbC3xILiegZ9ExEREVHrIIkO1tfEbDbDaDTCdPQoDAZDzQC5HNBoTty32ep+MZkM0GobHWt1ebFxRyGMGiUU8lpyO0mC0OpO3HXYgbr+TDViHYCoe+Cz0OmbFut0Av66Z2tqVKxWB0gSACD/SDmWrN6HnccsAIDEGCUmD87GiO5JkCQJFX4FIJPQN8OIeIUAvPUsvKfVBuoZANxuwFPP1LWNidVoAvtFY2M9nkB8XdRqQKFofKzXC7jqWfNDpQKUysbH+nyAs54kTqkMxDc21u8HHPW0PDUmVqEI1AUQOCbs9aza3pjYxhz3LXCOABAobz3HPXS6psU6HIF6rote37RYpzOwX0QiVnfiHAGXq/7jvjGxPEcE8BzR+FieI07gOaLxsW38HGEuLYUxPR0mk6n2a+eTiQ7GZDIJAMIUOMRq3iZMCH2CTld7HCDEqFGhsUlJdccOGhQMszg9wp6eWWes44we4vfCquDNcUaPOmNdmVkhsbazBtQZ60lIDIm1DD2nzlifVhcSazrvwro/GxASW3XxZfXG/rH7SDC24spr643dvmWf+N/2IrFuX5lw3XpbvbEiP//E3+K+++qP3bbtROycOfXH/vbbidinnqo/duXKE7EvvVR/7FdfnYh95536Yz/88ETshx/WH/vOOydiv/qq/tiXXjoRu3Jl/bFPPXUi9rff6o+dM+dE7LZt9cfed9+J2Pz8+mPvuONEbElJ/bFTppyItVrrj73yShGivtgWOEcIIYTIzq47tlev0NheveqOzc4OjR00qO7YpKTQ2FGj6o7V6UJjJ0yov95OduWV9cdarSdip0ypP7ak5ETsHXfUH8tzRODGc0TgxnNE4MZzxIkbzxGB2ynnCBMgAAiTySROh12hqFV7bdV+yGSAyelBlb2eTJyIiIiIoopdoU7FrlC1xzZTVyjJ5QJ8oc2SFVYX3vutED/uKYVDqYZGJccVAzIxLEOP7Hg1eqQZoKyt3tiEGcBuDo2PZTeHE9jNofGxPEcE8BzR+FieI5oWy3NEQCvsCtVxE4uG9BNrJlaXF+vzy2HUqmq/QCYAwJ5iC9746QB2FQXGXyTHqnFZv3RcOzgLnYxaKOQyWJwexGqU8Pr90KkUUS5x2+JweyGXsQ7DxXqMDNZj+FiHkcF6jAzWY/haSx025tq5VfyFX375ZTz99NMoKipCv379sGjRIgwePLjO+I8++gj//Oc/cfDgQXTv3h1PPvkkJkyY0IIlppZwRmosnrriLPy4pxTvrjuIUosLK3eX4s4x3bB49X4sWXsQZocXBq0C04bn4o7RXaFWyqNd7DbB5fFh8Y8H8M7afNZhGFiPkcF6DB/rMDJYj5HBegxfW63DqLdYfPDBB7jxxhuxePFiDBkyBAsXLsRHH32E3bt3IyUlpUb82rVrce6552LBggW45JJL8N577+HJJ5/Epk2b0KdPn9O+H1ss2ianx4f/bjqM83ulYmthFRat2Fcj5q7zu+FPAzKx/mBFFErYdpydk4BPNx/Gi8tZh+FgPUYG6zF8rMPIYD1GBusxfPXV4d3nd8eto7q0aMtFY66do55YDBkyBGeffTZeeuklAIDf70dWVhZmzpyJBx54oEb81VdfDZvNhq+++iq4bejQoejfvz8WL15cI97lcsF1Uj8ys9mMrKysVpFY+P2AXCZFpQxtkVIuYVBOAs5+/H8wO2r2kTRoFfhl9vkY8eRKVNg40Ls2CXoVfv77GAxdsJx1GAbWY2SwHsPHOowM1mNksB7D15A63PCPC6BStNwP022mK5Tb7cbGjRsxe/bs4DaZTIaxY8di3bp1tT5n3bp1mDVrVsi2cePG4bPPPqs1fsGCBZg3b17EyhwJKrkMqUYNvL4ONbwlbBqFDGanp9YDDQDMDi+q7B6MPiMZBRX1DMLrwDon6FBpYx2Gi/UYGazH8LEOI4P1GBmsx/A1pA4tTg8SY9QtXLKGiWpiUVZWBp/Ph9TU1JDtqamp2LVrV63PKSoqqjW+qKio1vjZs2eHJCLVLRbRpFLI0KuTMaplaKvcXj8MWkWdWXxSjBrPXd2/5QvWhrAOI4P1GBmsx/CxDiOD9RgZrMfwna4OYzXKKJSqYdp9B3+1Wg2DwRByo7bL5/dj2vDcWh+bNjwX3vqmvSMArMNIYT1GBusxfKzDyGA9RgbrMXxtuQ6j2mKRlJQEuVyO4uLikO3FxcVIS0ur9TlpaWmNiqf2RatS4I7RXQGgzc2U0FqwDiOD9RgZrMfwsQ4jg/UYGazH8LXlOmwVg7cHDx6MRYsWAQgM3u7cuTNmzJhR5+Btu92OL7/8Mrht+PDhOOuss2odvH2q1jArFIXP7vZC0Qrmdm7LWIeRwXqMDNZj+FiHkcF6jAzWY/haSx22mcHbADBr1ixMmTIFgwYNwuDBg7Fw4ULYbDZMmzYNAHDjjTciIyMDCxYsAADcfffdGDVqFJ599llcfPHFeP/997Fhwwa8/vrrDXq/6jzKbDY3zweiFuMFoATgtAdm/TLXs8gr1Y51GBmsx8hgPYaPdRgZrMfIYD2GrzXUYfU1c4PaIkQrsGjRItG5c2ehUqnE4MGDxS+//BJ8bNSoUWLKlCkh8R9++KE444wzhEqlEr179xbLli1r8HsVFhYKALzxxhtvvPHGG2+88cZbA2+FhYWnvc6Oeleolub3+3H06FHExsZCklp2DYnqGakKCwvZDYuajPsRRQL3IwoX9yGKBO5HrZ8QAhaLBenp6ZDJ6p/3KepdoVqaTCZDZmZmVMvA2akoErgfUSRwP6JwcR+iSOB+1LoZjcYGxbX76WaJiIiIiKj5MbEgIiIiIqKwMbFoQWq1GnPmzIFa3TqXYae2gfsRRQL3IwoX9yGKBO5H7UuHG7xNRERERESRxxYLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiJqsuuvvx4ajQZ79uyp8dgTTzwBSZLw1VdfAQBycnIgSVKN22233VbjuVVVVbjllluQnJwMvV6PMWPGYNOmTbWW4YsvvsDAgQOh0WjQuXNnzJkzB16vN7IflIiITksSQohoF4KIiNqmkpIS9OjRA/3798eKFSuC2/Pz89G7d29MmDABH3/8MYBAYhEfH4//b+/Ow6Mqz/6Bf88yayaZ7AmQAFEWWWQXCC6Ar0rRaqlalVpFqtK6oEixSqtStIJbVSpUXKr42nohvP1JrbRYi4BVQEnYQZAlIcEsZJ99Pef3xySTDNkzk0yW7+e65tLM3GfmmcOZc8597ud5zq9+9auQ9xg2bBgmT54c/FtRFFx++eU4cOAAHn30USQnJ+NPf/oTCgsLkZubi6FDhwZj//Wvf+G6667DjBkzMHfuXBw6dAhr1qzBggUL8Prrr3fytyciooaYWBARUVjeeustLFiwAOvWrcO8efMAALNnz8bOnTtx9OhRDBgwAEAgsRg9enSwgtGcDRs24NZbb8XGjRtx8803AwDKysowbNgwzJ49Gx988EEwdtSoUdBoNMjJyYEsywCAJ554AitWrMDRo0dx0UUXdcZXJiKiJrArFBERheWee+7BpZdeiiVLlqCiogLr16/Hli1b8Pvf/z6YVDTk8Xhgt9ubfb//+7//Q1paGm688cbgcykpKbjlllvw97//HW63GwBw9OhRHD16FAsWLAgmFQBw//33Q1XVYKWEiIi6BhMLIiIKiyAIeOONN1BTU4P77rsPjzzyCCZNmoQHHnigUeznn38Oo9EIk8mEwYMHY9WqVY1i9u3bhwkTJkAUQw9RkydPhsPhCI7n2LdvHwBg0qRJIXH9+/dHRkZG8HUiIuoacushRERELRs1ahSWLFmClStXQpIkbN68uVFiMGbMGFx22WUYPnw4KioqsG7dOixatAhFRUV4/vnng3HFxcW44oorGn1Gv379AABFRUW4+OKLUVxcHPL8+bFFRUWR/IpERNQKJhZERBQRycnJAAIVg9GjRzd6/eOPPw75e/78+Zg9ezZefvllLFy4EBkZGQAAp9MJnU7XaHm9Xh98veF/m4u1WCxhfBsiImovdoUiIqKwFRYWYtmyZRg9ejQKCwvxwgsvtLqMIAh45JFH4PP5sH379uDzBoMhOI6iIZfLFXy94X+bi617nYiIugYTCyIiCtuDDz4IIDD9609+8hM8++yzOH36dKvLZWZmAgAqKyuDz/Xr1y/Yzamhuuf69+8fjGv4/PmxdXFERNQ1mFgQEVFYPvroI3z88cd45plnkJGRgVdffRVarbbJwdvnq0s+UlJSgs+NGzcOe/fuhaIoIbFff/01jEYjhg0bFowDgJycnJC4oqIinD17Nvg6ERF1DSYWRETUYVarFQ899BDGjx+PhQsXAghUFJ555hls2bIFGzduBBCoSPj9/pBlvV4vnnvuOWi1WsycOTP4/M0334zS0lL8v//3/4LPlZeXY+PGjbj++uuDYypGjRqFiy66CG+++WbIe7/++usQBCF4DwwiIuoavEEeERF12MMPP4zVq1dj9+7duOSSS4LP+/1+TJ48GSUlJTh27Bj+9re/4fe//z1uvvlmZGVlobKyEh988AEOHz6MFStWYOnSpSHLXnbZZTh8+HDInbcLCgqwZ88eDB8+PBj7ySef4IYbbsDMmTNx22234fDhw1i9ejXuvvtuvPnmm126LoiI+jomFkRE1CG5ubmYMmUK7rvvPrz22muNXt+zZw+mTp2KBx98EHfeeSeWL1+OvXv3oqysDFqtFuPGjcNDDz2En/zkJ42WraqqwqOPPopNmzbB6XTikksuwUsvvdTonhUAsGnTJixfvhzffvstUlJScNddd+Gpp56CRqPplO9NRERNY2JBRERERERh4xgLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKmxztBnQ1RVFQVFSE2NhYCIIQ7eYQEREREXVbqqrCarWif//+EMWWaxJ9LrEoKipCZmZmtJtBRERERNRjFBYWIiMjo8WYPpdYxMbGAgisnLi4uCi3hoiIiIio+7JYLMjMzAyeQ7ckqonFF198gRdffBG5ubkoLi7GRx99hDlz5rS4zPbt27F48WIcOXIEmZmZeOKJJ3DXXXe1+TPruj/FxcUxsSAiIiIiaoO2DCGI6uBtu92OsWPHYs2aNW2Kz8vLw3XXXYeZM2di//79WLRoEe655x58+umnndxSIiIiIiJqSVQrFrNnz8bs2bPbHL927VpkZWXhD3/4AwBgxIgR+PLLL/HKK69g1qxZndVMIiIiIiJqRY8aY7Fr1y5cddVVIc/NmjULixYtanYZt9sNt9sd/NtisXRW89rMr6gotbigqGq0m9LjyaKIWL0Mo1biLF9E1Kepqgq7xw+rywu/wuNLuARBQIxWgkknQ5Y4Oz9RW/SoxKKkpARpaWkhz6WlpcFiscDpdMJgMDRaZuXKlVi+fHlXNbFNnF4/viu1wuXx82Q4XIIKnSTBpJeREqtDrF6DWL0MDQ8CRNQHeHwKrC4vrC4vymwe2FxeeHwKAB5bwqUKKmRBRIxWRpJJi3ijBrF6DQxaKdpN67NUVYXP54Pf7492U3oVSZIgy3JEzkl7VGLREUuXLsXixYuDf9eNbI82VVWRGqfnCXAEuLx+2N1+lNsskAQRRp2EpBgt4o1aVjOIqFdpWJWocnhQaffC6fFBUVUYZBmxeg10Mk98I8XnV2D3+HGm0o68CkCvEWHWa5Fk0iJOr4FJL0MSeXzpCh6PB8XFxXA4HNFuSq9kNBrRr18/aLXasN6nRyUW6enpKC0tDXmutLQUcXFxTVYrAECn00Gn03VF8yhK9BoJeo0EQAu/osLh8aGwyokzlXboZAmxehkpJj1i9TJMrGYQUQ/j9SuwunywOD0ot3lgc/ng9vkhiYGr6SkmPU9uO4ksiTAbRJgNGqiqCqfXj2qHByUWJ2RJhKm2mmE2ahCrYzWjsyiKgry8PEiShP79+0Or1fKCYYSoqgqPx4OysjLk5eVh6NChrd4EryU9KrHIzs7GP//5z5DnPvvsM2RnZ0epRdTdSKJQ2x0qcBBw+xRYnT6UWy2QRAFGrYTEGC0SYgLVDIOG1Qwi6l5UVYXD44fV5UOlw40quxcOtx8KFOhlGTE6GUkmXjDraoIgwKiVYdQGTp28fgUOjx/5FXYoZSr0WgnxhkA1I1bHakYkeTweKIqCzMxMGI3GaDen1zEYDNBoNDhz5gw8Hg/0en2H3yuqiYXNZsPJkyeDf+fl5WH//v1ITEzEwIEDsXTpUnz//ff43//9XwDAL3/5S6xevRq//vWv8fOf/xyff/45NmzYgM2bN0frK1A3JghCg2pGYNC83R2oZuRXOKDXiIgzNKhmcIAeEUVJXVXC6vKizOoOVCX8/kD3Tq2ElFgdT1K7GU0T1YxKuwfFFidkUYRJJyMpJlDNiNNrgsci6rhwrqRTyyK1bqOaWOTk5GDmzJnBv+vGQsybNw/r1q1DcXExCgoKgq9nZWVh8+bNeOSRR7Bq1SpkZGTg7bff5lSz1CaSKCDOoEGcob6aYXH4cM5igVxbzQgM0Ksbm9GjCnpE1IM0rEpUOzyosHvgcPvhVxUYNLVVCQ2rEj1Fc9WMvHI7VFWFQSvDbNAEqhl6DUw6VjOodxJUtW/NeWqxWGA2m1FTUxO1O2/b3D7syauA2aBlf/9uwld7EHB4/PCrKgwaEXEGDZJNOlYziCgivH4FNpcPVpcPZTYXrLVjJURBRIxWglHLk83eSFHV4CQjHsUPjRQYG5Ns0sJsCFzIYjWjZS6XC3l5ecjKygqrmw41r6V13J5zZ16SJUJggF6cQQxWM1xeBTUOL85Z3ZAEgdMNElG71XWPsbp8qLJ7UGn3BC5eKCp0sogYnYxEIweh9nZiM9WM02V2AHboNRLMBg2SY3Uw6WTE6mSITDD7jPz8fGRlZWHfvn0YN25cm5ZZt24dFi1ahOrq6k5tW0cwsSA6jyAIMGilYPJQN91gQaUDeRVqYGyGPlDN4HSDRNSQz6/A5q6vSlicPri8SmDyCI2EZBPHSvR1Dcdm1FUzKmwelNS4IMtC7UxfOsQZNKxmUI/DxIKoFU1NN1jj8KLU4oJcO91jciynGyTqqxweX/1YCVugKuFTVOhlEUatjESjyKoENampaobd7cPJMhsAwKiVAheyYmu75WpZzaDujZ3GidqhboBekkmHAfFGJMZo4VdV5FfYsfdMFb7Jr8CBwmoUVjpQ4/DCr/SpIUxEfYLPr6Da4UFhpQP7CqqwJ68S+wqqUVDhhKoCSTFaDIg3IMmkg4E36KR20Egi4o1a9DcbkB6nh0YUUW5z4/DZGuTkV+Kb/EqcOmdDuc0Nt493nwYA2O3NP1yutsc6nW2L7YAtW7bgsssuQ3x8PJKSkvDDH/4Qp06dajJ2+/btEAQBmzdvxpgxY6DX6zF16lQcPny4Ueynn36KESNGwGQy4Qc/+AGKi4uDr+3ZswdXX301kpOTYTabMX36dOzdu7dD7W8PJhZEYQiUtDVIjzOgv9kAvSyh0u7BkaIa7DlTiT35lThRasU5qwsuLw8CRD2V0+PHOYsL35VYsSevCjn5VThabEGNwwuDRkZ/sx7pZj3iDBpO9EARIQoCYnQyUmP16B9vgFmvhden4GSZLXAhK68Sh76vRlG1ExaXF0pfvZBlMjX/uOmm0NjU1OZjZ88OjR08uOm4DrDb7Vi8eDFycnKwdetWiKKIH//4x1AUpdllHn30UfzhD3/Anj17kJKSguuvvx5erzf4usPhwEsvvYT3338fX3zxBQoKCrBkyZLg61arFfPmzcOXX36J3bt3Y+jQobj22mthtVo79B3ail2hiCKktZsncbpBop7Dr6iwuXywuLwot7lhcXkDYyVq74+TFKNlAkFdSiuL0MpaxCMw01Qg2XWjqNoFjSTApAuMzYitHZuhk9ktt7u46bwE55133kFKSgqOHj0KUzPJyrJly3D11VcDAN577z1kZGTgo48+wi233AIA8Hq9WLt2LS688EIAwIMPPoinn346uPyVV14Z8n5vvvkm4uPjsWPHDvzwhz+M2Hc7HxMLok7SlpsncbpBou7D6fHD6vaixuFFudUDu8cHn6JALwcmc0gwcAYn6h7qqhkxusBpnMenwO7x4btSGwQhMDYjwahFQkzg+BLTm8dm2GzNvyadd1w9d6752PNvEJef3+Emne/EiRN46qmn8PXXX6O8vDxYqSgoKMDIkSObXCY7Ozv4/4mJiRg+fDi+/fbb4HNGozGYVABAv379cK7B9ystLcUTTzyB7du349y5c/D7/XA4HCH3h+sMTCyIukBz1YzTZXaoqq1RNYPTDRJ1voZViUq7B9VOD5wePyRRgEEjIzGG9xqinqGumpFgDGzXTq8fJTUunK12QisFkpC6maZMva2aERMT/dhWXH/99Rg0aBDeeust9O/fH4qiYPTo0fB4PB1+T41GE/K3IAhoeGu6efPmoaKiAqtWrcKgQYOg0+mQnZ0d1me2BRMLoihobrrB4honNDJvnkTUWVxePyyuQFWiwu6Bze2Dz69AJ0uBq7ysSlAPJ4mBblGm2mqG2xe4+et3pTaIImDQBKoZiSZtMI7bfOepqKjA8ePH8dZbb+Hyyy8HAHz55ZetLrd7924MHDgQAFBVVYXvvvsOI0aMaPPnfvXVV/jTn/6Ea6+9FgBQWFiI8vLyDnyD9mFiQRRlzU03WHfzJAOnGyTqML+i1t5XwosKmwc1Ti+cHh/EuqqEkVUJ6t10sgSdLDWuZlQ5oJVFmPQapNRVy/UaaGX+HiIpISEBSUlJePPNN9GvXz8UFBTg8ccfb3W5p59+GklJSUhLS8Nvf/tbJCcnY86cOW3+3KFDh+L999/HpEmTYLFY8Oijj8JgMITxTdqGiQVRN1M33WA86gfo1d08SSMHkpAUkw5mY2AAOKsZRKFctXe7rnF6UG6rrUooCnRSoCoRbzDwCi31SU1WM9x+HLdbIQoCDFoZiTEaJBgDiUYMp0sOmyiKWL9+PR566CGMHj0aw4cPxx//+EfMmDGjxeWee+45PPzwwzhx4gTGjRuHf/zjH9BqtW3+3D//+c9YsGABJkyYgMzMTKxYsSJk1qjOIqgNO2T1ARaLBWazGTU1NYiLi4tKG2xuH/bkVcBs4JUyah+PT4HD44Ojdupao1ZCvFGDpBgdTKxmUB+lKCqsbh9sbh/Kre5gVaJ+bJPEfS1RK/xK4EKW3eODXwl0DzTpA91y42q75Ubrd+RyuZCXl4esrCzo9fqotKGrbN++HTNnzkRVVRXi4+O77HNbWsftOXdmxYKoB2luusHvq1zQyoErUUkxgWoGpxuk3uz8qoTd44PXp0ArSYjRSTAbDBB5pZWozSRRCFyg0gdODV1eP+xuPyrsVogQYdBJSIzRINEY6JZrZDWDmsDEgqiHam66wZNlNggIVDPMtdWMXj/dIPV6iqLC5vHB6gpUJSwuL5yeQOXOoAkMumZVgihy9BqptqutNljNKKpyobDSAZ0UqGakxOpqx2ZEr5pB3QsTC6JeIjjdIALVDEeDmyf1+ukGqVdyef2wuX2ocXhRZnPD4fHB61OhkUQYtRLi4jSsShB1geaqGeU2CyRBhFEnITFGWzs2g9WMcMyYMQM9eZQCEwuiXkgUmp9usOHNk+qmG+SVJuouPH4FNpcPFXY3qh1eOGqrEkaNBLNeyxlriLqB86sZDo8PZ6ucKKi0QydLiNXLSDHpEauXYdCGfxHL7fVDVVUoSuDRFwkCekSyxsSCqA9obrrBwioHdLLILlLUbfgVFR6fCo0kIEYrI51VCaJuTRKF4FS1qqrC7VNgdfpQbrVAEgVo5PB/v4LiRaxPgcPjhV/sm6eueo0EjdR5+8JIVUn65r8OUR92/nSDHp8Cfx+9AkTdjyQKrEoQ9VCCIDSoZtRdKFDCf2NVBDyAx+WCwWAM//16GJ+iAJ18mHY4HAAa39G7vZhYEPVxPIkjIqLOIIlCRLpCARL83hiUl5cBAqA3GCCg71QyfYoK0e+DvxOO16qqwuFw4Ny5c4iPj4ckhffvxcSCiIiIiLo1MSYBih04d66sD6UUAX5VhVYSIXVit+X4+Hikp6eH/T5MLIiIiIioWxMEAZIpEaoSDyi+aDenS1VaXRieGoeUWF2nvL9Gowm7UlGHiQURERER9QiCKAKiNtrN6FJ+0Q+NVtcj7jrOztVERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBQ2JhZERERERBS2sBILj8eD48ePw+fzRao9RERERETUA3UosXA4HLj77rthNBoxatQoFBQUAAAWLlyI5557LqINJCIiIiKi7q9DicXSpUtx4MABbN++HXq9Pvj8VVddhQ8//DBijevNYnQyZFGIdjN6NEkUoJNFSFyPREQAuF+MFK5Hoo6RO7LQpk2b8OGHH2Lq1KkQhPof3ahRo3Dq1KmINa43cnp80EoiBifFwGzUoMbhRbXDC7dPiXbTegydLCI+RoN4gwY1Ti/MBg2quR6JqA/jfjEyuB6JwtOhxKKsrAypqamNnrfb7SGJBoVye/1Yu+M03t2ZB4vThziDjPnTsvCL6RfgbKWTO6020MkiMhINWLvjFNbtzOd6JKI+j/vFyOB6JApfh7pCTZo0CZs3bw7+XZdMvP3228jOzo5My3oZp8eHP20/hVVbT8DiDAx2tzh9WLX1BN7YcRrxRk2UW9gzxMdosHbHKfxx60muRyIicL8YKVyPROHrUMVixYoVmD17No4ePQqfz4dVq1bh6NGj2LlzJ3bs2BHpNvYKkiji3Z15Tb727s483DfjQqzaegJ2t7+LW9ZzxOgkPPnDkVi3M7/J19/dmYcHZl6IcpsHfkXt2sYREUWBJAqIN2ha3C/y+NI6Hl+IIqNDicVll12G/fv347nnnsPFF1+Mf//735gwYQJ27dqFiy++ONJt7BWsLm/wCsj5LE4fym1u5ORX4XiptYtb1nMMT4tFmdXd4nqsdnghi4Cf1Woi6uUUVUVJlRMJRg2PL2Fqy/GlyuGFwqSCqEUdSiwA4MILL8Rbb70Vybb0arF6DeIMcpM7rTiDjGSTDlMuSMDoAXFRaF3PEKOTkBKra3E9mvQybntzNy5IicGkQYkYlxmPGF2HN3Miom7F4vRib0EVcguqsPdMFWRJxJePzeTxJUxtOb7E6mXc8NqX6B9vwKTBCZg4MBH94/UcW0rUQIfOuOruW9GcgQMHdqgxvZlfUTB/WhZWbT3R6LX507JgcXrxk4lcb62xuLzNrse7sgdj58lynC6343S5Hf/59hxEARjRLw4TByVg0qBEDE4y8iBARD2Goqo4dc6G3IIq5ORX4cQ5KxpeNDdqJRz+vgZ3TRuMP2492Wh5Hl/arrXjyzd5lSi1ulFqdWNfYTXeQh76mfWYODABEwcn4OIBZuhkKQotJ+o+BFVV213XE0WxxZMzv7/79uO0WCwwm82oqalBXFzXXr1xe/340/ZTnBUqDHWzdrzRzOxa+WV25BZUIye/ErkFVThb5QxZPilGi4mDEjBxUALGZcbDqGU1g4i6F5vLh32FgURib0EVqp3ekNcHJxkxcVAiJg5KwIj0WMTo5Bb3izy+tE1rx5fCCgdOldmRc6YSuWeqcKTIAl+DLE8ribg4w4yJAxMwaXAC+pkNUfw21JsU1TgxNiMe6WZ968GdoD3nzh1KLA4cOBDyt9frxb59+/Dyyy/j2WefxY033tjet+wy0UwsAMDh8UEWRVQ7PTAbeB+LjtDJIuKNGsQbNahx+mA2yM3OM15icSH3TBVy8itx8PsaeBq8LokCRvaLw6TaRGNgIqsZRNT1VFXF6XI7cs5UIfdMFY6XWEKqEgaNhHGZ8cGLIskmXaP3aM9+kZrXnvXo8Phw8GxN8N+t3OYOeb2/WY9JgxMxcWACRg8wQyt3aCJOot6fWDRn8+bNePHFF7F9+/ZIvWXERTuxAACb24ejRTXQyxJPZMMgiQJkUYBPUds0S4fHp+Dw9zW1XQoqUVTjCnk92aSr7TKVgLEZ8TBoWdImos5hc/uwv7AaubVXv6scoVWJgYnG4P5oRL84aKS2nZS2d79ITWvvelRVFQWVjsCFrDNVOFpsCVlOJ4u4eIA5kGgMSkB6XHROEKln6rOJxcmTJzF27FjY7fZIvWXEdZfEYk9eBcwGbZsPFhR5RdVO5J4JDII8dLYGngZTScmigFH968dmZCQYmAQSUYepqor8CkewG823xaFVCb1GxNiM+qpEaixPPHsyh6cucQwkGpV2T8jrGQmG2i5TiRjVv+2JI/VNvT6xsFgsIX+rqori4mL87ne/w7Fjx7B///72vmWXYWJBTXH7/Dj0fQ1y8wMHgRJLaDUjNba+mjEmIx56DasZRNSyhieXuWeqUNHEyWWgKyZPLnuzdiWVAxOQymoGnafXJxZNDd5WVRWZmZlYv359t777NhMLao2qqiiqdiG3oBI5+VU4XFQDr7/+ZyKLAi4eYA5WMzjdIBEBrXeH0coixmaYgwOv2R2mb7K5fThQWB1MNCLVDY56r16fWJx/d21RFJGSkoIhQ4ZAlts/y86aNWvw4osvoqSkBGPHjsVrr72GyZMnNxm7bt06zJ8/P+Q5nU4Hl8vVZPz5mFhQe7m8fhw8Wz8245w1dIBeepw+OAB89AAzqxlEfYjT48eBs9WtD+AdlIDR/TmAl0Ipqoq8cnswGe3IwH3q/Xp9YhFJH374Ie68806sXbsWU6ZMwauvvoqNGzfi+PHjSE1NbRS/bt06PPzwwzh+/HjwOUEQkJaW1qbPY2JB4VBVFWer6sdmHP6+ptF0g6MHmIOJRv94TjdI1JvU7QPaMuUo9wHUXlaXF/sLA4nq3jOtTzUs8xyiT+iVicXHH3/c5gbccMMNbY6dMmUKLrnkEqxevRoAoCgKMjMzsXDhQjz++OON4tetW4dFixahurq6zZ/REBMLiiSnx4+D31cjp3ZsRlNXK+u6THG6QaKeKVC1rK9KNFu15E3SKIIUVcXp2vtm5ORX4btSKxqesBm1gWrGpEEJmDAwAUmsZvRaPSmxaHO/pTlz5rQpThCENt8gz+PxIDc3F0uXLg0+J4oirrrqKuzatavZ5Ww2GwYNGgRFUTBhwgSsWLECo0aNajLW7XbD7a4/CJw/8JwoHAathClZSZiSlRTSvzr3TBWOFFtQVONC0cFi/ONgMbSyiDF11YzBiexfTdRNqaqK72tnjcs507gyqZEEjO5vxqTBCZg4kOOsqHOIgoAhqSYMSTXhtksGosbpxb6CQLV875kqWFw+7DxVgZ2nKgAAFyTHBLtMXZQeB0nkNkldr82JhaJE/gY75eXl8Pv9jboxpaWl4dixY00uM3z4cLzzzjsYM2YMampq8NJLL2HatGk4cuQIMjIyGsWvXLkSy5cvj3jbic4nCAIGJcVgUFIMbpyQAYenboBe/YwwObUnKvjiNAbEG0LGZrB6RRQ9Lq8fh7+vv9lZUzPD1d3sbEwGx1JR1zMbNJgxPBUzhqfCr6g4VWZDTn4lcguqcKLUhtPldpwut2Nj7lnE6CSMz0wIzjSVEKONdvOpj4jqGIuioiIMGDAAO3fuDJlJ6te//jV27NiBr7/+utX38Hq9GDFiBObOnYtnnnmm0etNVSwyMzPZFYq6VN10g4EroJVNTjc4ZkB87RVQTjdI1BWKqp3BROLw943vZTN6QO1YicEJyIjnvWyo+6pxerG3oAo5+VXYV1AFq9sX8vqFKTGYVDs2Y1haLKsZPUyv7Ap1Prvdjh07dqCgoAAeT+jc3A899FCb3iM5ORmSJKG0tDTk+dLSUqSnp7fpPTQaDcaPH4+TJ082+bpOp4NOx36HFF2CICArOQZZyTG4eWJGcLrBum5TlQ4PvsmvxDf5lQCAzERj7c2TEjCS0w0SRYTHp9RWJSqRc6YKxTWhVYmUWF3wdzdmQDwMWlYlqGcwGzSYOTwVM2urGSdKrcgpqEJufhVOltlwqsyOU2V2fJhTiFidjPED4zFxUCImDIxHvJHVDIqcDiUW+/btw7XXXguHwwG73Y7ExESUl5fDaDQiNTW1zYmFVqvFxIkTsXXr1uAYDkVRsHXrVjz44INteg+/349Dhw7h2muv7chXIYoKk07GpUOScemQZKi10w3WXTk9VmJBYaUDhZUObNr/PQwaCWMzzZg4MHC1KSWWiTJRW5VYXMjNDyQSB7+vgcdXX5WQRAGj+sUF+6UPTDSyKkE9niQKuKhfHC7qF4efTRmEKocHe2tnMtxbW8344kQ5vjhRDgHAkFRT8EaNQ1JNrGZQWDrUFWrGjBkYNmwY1q5dC7PZjAMHDkCj0eBnP/sZHn74Ydx4441tfq8PP/wQ8+bNwxtvvIHJkyfj1VdfxYYNG3Ds2DGkpaXhzjvvxIABA7By5UoAwNNPP42pU6diyJAhqK6uxosvvohNmzYhNzcXI0eObPXzOCsUdXc2lw/7CgNjMfYWVKH6vJsnDUo0BrtMjegXx+kGiRrw+gNVibqB199XO0NeT4rRBsc2jc2Mh1Hb4cI9UY/jV1QcL7UGx2acLrOHvB6nlzGhdqrk8QMTYDZootRSaqjXd4Xav38/3njjDYiiCEmS4Ha7ccEFF+CFF17AvHnz2pVY3HrrrSgrK8NTTz2FkpISjBs3Dlu2bAkO6C4oKIAo1p84VVVV4d5770VJSQkSEhIwceJE7Ny5s01JBVFPYNLLuHxoCi4fmhKcbjC3tuvG8RIrzlQ6cKbSgb/t/R5GrYSxGfVjMzjdIPVF5yyu2htYVuHg99VweeurEqIAjOgXh0mDEjFpUAIGJbEqQX2XJAoY2S8OI/vF4c7swai0B6oZOWcqsa+wGhaXD9u/K8P278ogABiWFhu8C/iFqSaI/O1QKzpUsUhJScHOnTsxdOhQDBs2DK+99hpmzZqFY8eOYeLEibDb7a2/SZSwYkE9mcXpxb7CauTW3pzL4godoJeVHBPsI96W6QYlUYAsCvApKvxKVO+V2aNxPUZGW9ej16/gaLEFOfmB7h2FlY6Q1xON2mD3prGZ8TDpWJUgao3Pr+BYibW2W24l8itCf1fxBk2DakY8YvWtVzO4bwyfJAqocngwJNWEtChN7NLpd96+5pprcNddd+GnP/0p7r33Xhw8eBAPPfQQ3n//fVRVVbVpNqdoYWJBvYWiqjh5zhacaepEqS3k5kkxWgnjBgauNJ0/3aBOFhEfo0G8QYMapxdmgwbVDi+qHV64fZGfWrq34nqMjLasxzKru/aO95U4UFgDp7f+fkmiAFyUHhe8spqVHMOqBFGYym3u4AQj+wurG/3mhqfFYmLtFMwXpMSEVDO4bwxfw3VY7fAi3qiFT1Gi0n2z0xOLnJwcWK1WzJw5E+fOncOdd94ZrGC88847GDt2bIcb39mYWFBvVTfdYO6ZwM2TmppucOKgRMwcnoLpw1OwdscprNuZD4vThziDjPnTsvCL6RfgbKWTO/420MkiMhINXI9hamk93nv5BViz7QT+eagEZ86rSsQbA1dPJw1KwPjMBJj0rEoQdRavX8G3xZbg2KWC836PCXW/x8GJyL4gCcPSTdw3hqGl/eL9My6Erovvo9PpiUVPxsSC+gK/ouLEOWtwpqmT52zB1966cyIOnq3Ba583nqL54f8ZilsnZYbEU9OGpJqwPqcAf9zK9RiOltbjwiuH4OIBZix4PxeiEOjvXTd7zflXSImo65yzuoLVjANnQ8c18RgTvtaOL7+YfkGXVi46PbH4/e9/j9tvvx1ZWVkdbmS0MLGgvqjK4cG+giocL7XipZ+MxdSVW2Fx+hrFxRlk7F76P7js+W2otHuaeCcCgMQYLb58bCbXY5jash6/XnoV/vr1GYxIj0McZ6gh6na8fgVHiyzIOVOJU+ds+PCX2dw3hqEt+8Wc314Nrdx154+dPivUxo0bsWzZMkyZMgU/+9nPcMsttyA5OblDjSWizpdg1OLKi9Iwe3Q/2N2+JndWAGBx+lBp9yDVpEMVd/rNSjXpUGHzcD2GqS3r0eb24YqhKew6QdRNaSQRYzPjMTYzHjpZhM3FY0w42rJftLq83XYWyA4lFgcOHMCRI0fw17/+FS+99BIWLVqEq6++GrfffjvmzJkDo9EY6XYSUQT4FBVmgwZxBrnZKyGpsTq8fOs4zuDRAkkUkBan43oMU1vWo9kgo8zqjkLriKi9fIqKeCOPMeFoy36xLTNyRUuH6yijRo3CihUrcPr0aWzbtg2DBw/GokWLkJ6eHsn2EVEE+RUV1U4v5k9ruhvj/GlZqHZ4ucNvBddjZHA9EvUu/E2Hry3r0Kd03wpuREZ+xMTEwGAwQKvVwmq1RuItiaiTVNu9+MX0CwAA7+7Ma3LGDmod12NkcD0S9S78TYevpXUYjVmh2qPDs0Ll5eXhgw8+wAcffIDjx49j+vTp+OlPf4qbb74ZZrM50u2MGA7eJqqdH9uoQbxRgxqnD2aDzDnGO4DrMTK4Hol6F/6mw9dwHVY7vYg39OL7WEydOhV79uzBmDFjcPvtt2Pu3LkYMGBAhxvclZhYENXjXVEjg+sxMrgeiXoX/qbD19PuvN2htOd//ud/8M4772DkyJHoY7fBIOpV/NzZRwTXY2RwPRL1LvxNh8+vqLC5fegpp9sdulz+7LPPYteuXRg9ejT0ej30ej1Gjx6Nt99+O9LtIyIiIiKiHqBDFYunnnoKL7/8MhYuXIjs7GwAwK5du/DII4+goKAATz/9dEQbSURERERE3VuHEovXX38db731FubOnRt87oYbbsCYMWOwcOFCJhZERERERH1Mh7pCeb1eTJo0qdHzEydOhM/X9J0CiYiIiIio9+pQYnHHHXfg9ddfb/T8m2++idtvvz3sRhERERERUc/S4clw//znP+Pf//43pk6dCgD4+uuvUVBQgDvvvBOLFy8Oxr388svht5KIiIiIiLq1DiUWhw8fxoQJEwAAp06dAgAkJycjOTkZhw8fDsYJghCBJhIRERERUXfXocRi27ZtkW4HERERERH1YLztMxERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERhY2JBRERERERha1DN8ij8AmCgHNWF0TenTxsBo2EGJ0MjcQ8mYj6Lo9Pgd3jg9PrB48s4VOhQidJMGpl6DUiBB6viVrFxCIKDBoJw9JioahqtJvS47m8fpTbPKhyeuD1K9CKEmJ0EvQaiUkbEfVqiqrC6fHD7vHB61ehlQWYdDIyEgzQyrzQEi5VBcptblicXlQ5FUiCAINGglErQeaFLKImMbGIAkkU0D/eEO1m9BpZySpsbh+sLi8qbB7UOL2ocXoBAEatDKNWYjWDiHoFr1+B3e2Do7YqYdBKSI3TISlGB5NehkkrQxR5USVSMhONcHr8sLq8qHYEjjEVdg98igq9LLKaQXQeJhbU40miALNBA7NBg4wEI1xeP6wuH2qcnpBqRqCkLcGgkXgQIKIeoa4q4fD44fUr0MgCjFoZQ+INMBs1MOlk6DVStJvZqxm0Um0Cp4fPr9ReyPIFqxmVjkA1w6hlNYOIiQX1OnpNoCtUSqyuyWpGtcMDURRg0LCaQUTdT11Vwun1Awic2CaZtEiO1SGWVYmokiUR8UYt4o1aZCQY4Ky9kFXt8DSqZsToZOhkVjOob2FiQb1aU9UMi8uLGocXFXYPKh0e+PwKdDKrGUQUHYqqwuX1w+72w6P4oZFExGhl9IvXw2zQIlbPqkR3JAhCbXdbGWm11Qyryweb24dzVhesLh8qHQpEQUCMNjAIXGJCSL0cEwvqU+qqGamxelygqLC5fLC4vKi0e1Dt9KDa6YUogNUMIupUXr8Ch8cPh8cHVVVh0MpIMmmRZNIiVh/o4sST0J5FlkQkxGiREBNazaiyByoZ5TY3/IoKfe0AcCaL1BsxsaA+SxIFmI0amI2a+gF67kA1o9xaX83Qy5xukIjCo6oqnN7AWAm3v74qcUFKDKsSvdD51QyvX4HNFRibUWarq2a4IQoiqxnUqzCxIKoVHKAXq0dWsgK7O9BtqtzmhsVVP92gXiMhhgP0iKgVdVUJp9cHRQlUJRJjWJXoizQNqhmZiQY4PLXVDIcHlXYPyqxu+FUlWC1nkkk9FRMLoibIkgizUQytZpw33aBfVaGTON0gEQWEVCV8fsiSCJNOxuC4GJiNGsTpNTxhJAiCgBidjBidjHRzoJphdQUmGSmzumGrrWZIQqCqZdBKTECpx2BiQdQGnG6QiJoSUpVQAb1GRGKMFokxWsTpNTDpWZWglmmk+m1mYKIRdo8/mFxU2gPJhgIFBjmQZDA5pe6MiQVRO3G6QaK+S1VVuLwKHB4fXD4/ZDFwVXlwUgzMBg1i9RoYtDzxo44RhMDd00211QyPL3Ahy1J7Xyaby4dKuxuSyGoGdU9MLIjC0NR0g3XVjDKbC1Zn/XSDRo2EGPapJupxfH4Fdo8fLq8fflWFXhPoJnmhyYQ4vQYxOlYpqXNoZRGJcqCaMShJhb22W26lzYMqpxdlNhcUVYVBlmHUSdDJTGopuphYEEVQc9WMKntggF7ddIM6VjOIuq2GVQm3P9DNMUYrY2CiEfFGViUoOhpWM/qZDfD4FFhd3sDYDJsncCNYnxuyKMKok2HQsJpBXY+JBVEnadt0g35ON0jUDfiC95UIVCUMtVWJZFPt3a51MqsS1K1oZRFJJh2STDoMSlJhcwduzldp86DK4UGZzQtFCYwRNGpZzaCuwcSCqItwukGi7kNVVbh9CuxuH1w+BbIYmHxhYJIB8cbAfSUMGokVReoRBEFArD5QTetnNsDtCwwAtzi9KLO5A9UMv6d2Ow8cY0Ru29QJmFgQRUF7phs0sppBFBF+RYXd7YPD64eiBiZYiDPKuNCkh0knI1bPqgT1DjpZgs4k1VYzYmD3BKrllXYPqh0elFq8UNVANSNGK0Mrc7unyOi7iYXdDkhNXBGWJECvD41rjigCBkPHYh0OQFWbjhUEwGjsWKzTCShK8+2IielYrMsF+P2RiTUaA+0GALcb8PkiE2swBNYzAHg8gNcbmVi9vn5baU+s1xuIb45OB8iBn6BG8SMRXiTqgYE6LRweKZBcON2otrtQbpfhl0XoZRlGUYVBbX49qBotoNEE/vD5IHjcbYv1+yG4Xc23V9ZA1WrbH6soEFzOyMRKMlSdrrbxKgSnIzKxogS1we9ecLT0W25HrCBCbfC7b1ess+XfvWowdjDWCajN/+5VY0zHYl0uQGn+d9+uWEP9715wuwF/C9t7C7GqqsJTW5Xw+FTAaIBRr0FmggEJGiBWUmurEioAL+Bq8NvuZvuIdsX6fIH9ZXO0ofuINsf6/YH9e3M0mkB8e2MVJXA8ikSsLAfWBRD4TTha+N23J7Y95wbd7DxCFAXEKl7EalT0j5fhNgmBC1lOLyrsTljtfrg0BmikwFgio+KB2Nz7onftIxrF6ut/94LHA/ia/y23K1ZX/7tvTyy8XgheDySHC4JdA8jnrZOu2ke0tF02+gJ9TE1NjQpArQn8HBs/rr02dAGjsek4QFWnTw+NTU5uPnbSpNDYQYOajx05MjR25MjmYwcNCo2dNKn52OTk0Njp05uPNRpDY6+9tvnY8zejm29uOdZmq4+dN6/l2HPn6mPvv7/l2Ly8+tglS1qOPXy4PnbZspZjv/mmPvaFF1qO3batPnb16pZjP/mkPvbdd1uMtfzvB2pemVXdk1ehHn7lrRZjC/6wRj1YWK0eLKxW89Z92GLs2WdeDMae2vCPFmOLfvt0MPbEPz5vMbbkkceCscf/s6vF2HO/WBiM/XbngRZjy++8Jxh7ZP/JFmMrb54bjD10/PsWY6uv+1Ew9mBhdYuxNVdeExLrNzS/j7BOvTQk1puY1Gysfcz4kFh3Rmazsc5hF4XEOodd1GysOyMzJNY+Znyzsd7EpJBY69RLm431G4whsTVXXtPiemsYW33dj1qMPXT8+2Bs5c1zW4w9sv9kMLb8znta/nc+clz1+Py9ch+hbthQH7thQ8ux775bH/vJJy3Hrl5dH7ttW8uxL7xQH/vNNy3HLltWH3v4cMuxS5bUx+bltRx7//31sefOtRw7b159rM3WcuzNN6shWortYecR/oGD1O+rHOrBs1XqF9+dU6tHjW02trfvI77deSAYe+4XC1uMPf6fXcHYkkceazH2xD8+D8YW/fbpFmNPbfhHMPbsMy+2vK110T6iBlABqDU1NWpr+m7FgqgHidXLiE02YVCSCleqKdrNIepWXLV3u45ze5HUQpzZqAXY1YkohCgA/eMN6B9vgMvrh6TjqSF1nBBIvPsOi8UCs9mMmqIixMXFNQ7oZiXMdseyK1Tg/3tYV6iOljDrphu0ubwot3tgc3nhFmRIOl1gukFBhexr/n3ZFaoWu0IF9YRuDoqiwunxw+7xwavXQyfLMOllpGiBWFlArF6GpqkEog/uI5rErlDtj+3BXaHaHVt7bqAoKmweH2wuHypsblhcXrg8CnxGY/C+TFqPu1vuIzoc2027QpXUuHDxADPSzPrQ4C7aR1jKymDu3x81NTVNnzs30C0SizVr1uDFF19ESUkJxo4di9deew2TJ09uNn7jxo148sknkZ+fj6FDh+L555/Htdde26bPCiYWbVg5RD2JqjaebtDp9QemG9RIvHkS9Wh1VQmXzwcRIow6CQkxGiQaA9PBGrWcwYmoM7lq78tUU3sXcLvHB69PgVaSEKMLzGTImaY6R1GNE2Mz4pF+fmLRRdpz7hz1eteHH36IxYsXY+3atZgyZQpeffVVzJo1C8ePH0dqamqj+J07d2Lu3LlYuXIlfvjDH+KDDz7AnDlzsHfvXowePToK34Coe2hpusFyu6d+ukFB4M2TqNvzN6hK+BUFOlmCSS9jUJIBcQYtTDrOZEPUlfSaQPKQEqtDVnLgQpbV5UWFzYMapxc1zsBV+LrpbJusGlKvF/WKxZQpU3DJJZdg9erVAABFUZCZmYmFCxfi8ccfbxR/6623wm6345NPPgk+N3XqVIwbNw5r165tFO92u+FuUO6xWCzIzMxkxYL6FEVRg9MNVtkD1QyHxw9VDUw3qBF5AKDuwetX4PT5IAoCDFoZiTEaJBi1iNVrEMOqBFG31GQ1w69AJ0nQySIE8HcbjmqXB+MzE1ixaI3H40Fubi6WLl0afE4URVx11VXYtWtXk8vs2rULixcvDnlu1qxZ2LRpU5PxK1euxPLlyyPWZqKeSBTrqxn94wPVjLrpBstsbvj8Ue8RSQQAMOgCN6mr215ZlSDq/lqqZtjdPqjgMSYcCUYtNFLPSM6imliUl5fD7/cjLS0t5Pm0tDQcO3asyWVKSkqajC8pKWkyfunSpSGJSF3Fgqgvq7t5UrJJh8HJMVC4z6duQhTAqgRRDyaJAswGDcwGDTISjPDzABMRPaXrctTHWHQ2nU4HXd2sD0TUiCAI6CEXQoiIqIfpKSfEFBlRrTEnJydDkiSUlpaGPF9aWor09PQml0lPT29XPBERERERdb6oJhZarRYTJ07E1q1bg88pioKtW7ciOzu7yWWys7ND4gHgs88+azaeiIiIiIg6X9S7Qi1evBjz5s3DpEmTMHnyZLz66quw2+2YP38+AODOO+/EgAEDsHLlSgDAww8/jOnTp+MPf/gDrrvuOqxfvx45OTl488032/R5dZNgWSyWzvlCRERERES9RN05c5smklW7gddee00dOHCgqtVq1cmTJ6u7d+8OvjZ9+nR13rx5IfEbNmxQhw0bpmq1WnXUqFHq5s2b2/xZhYWFKgA++OCDDz744IMPPvjgo42PwsLCVs+zo34fi66mKAqKiooQGxvb5TOP1M1IVVhYyHtoUIdxO6JI4HZE4eI2RJHA7aj7U1UVVqsV/fv3h9jKfa+i3hWqq4miiIyMjKi2IS4ujj8eChu3I4oEbkcULm5DFAncjro3s9ncpjjeeYiIiIiIiMLGxIKIiIiIiMLGxKIL6XQ6LFu2jDfso7BwO6JI4HZE4eI2RJHA7ah36XODt4mIiIiIKPJYsSAiIiIiorAxsSAiIiIiorAxsSAiIiIiorAxsSAiIiIiorAxsYiwNWvWYPDgwdDr9ZgyZQq++eabFuM3btyIiy66CHq9HhdffDH++c9/dlFLqTtrz3a0bt06CIIQ8tDr9V3YWupuvvjiC1x//fXo378/BEHApk2bWl1m+/btmDBhAnQ6HYYMGYJ169Z1ejupe2vvdrR9+/ZG+yJBEFBSUtI1DaZuZ+XKlbjkkksQGxuL1NRUzJkzB8ePH291OZ4b9VxMLCLoww8/xOLFi7Fs2TLs3bsXY8eOxaxZs3Du3Lkm43fu3Im5c+fi7rvvxr59+zBnzhzMmTMHhw8f7uKWU3fS3u0ICNyxtLi4OPg4c+ZMF7aYuhu73Y6xY8dizZo1bYrPy8vDddddh5kzZ2L//v1YtGgR7rnnHnz66aed3FLqztq7HdU5fvx4yP4oNTW1k1pI3d2OHTvwwAMPYPfu3fjss8/g9XpxzTXXwG63N7sMz416Nk43G0FTpkzBJZdcgtWrVwMAFEVBZmYmFi5ciMcff7xR/K233gq73Y5PPvkk+NzUqVMxbtw4rF27tsvaTd1Le7ejdevWYdGiRaiuru7illJPIAgCPvroI8yZM6fZmMceewybN28OOXDfdtttqK6uxpYtW7qgldTdtWU72r59O2bOnImqqirEx8d3Wduo5ygrK0Nqaip27NiBK664oskYnhv1bKxYRIjH40Fubi6uuuqq4HOiKOKqq67Crl27mlxm165dIfEAMGvWrGbjqffryHYEADabDYMGDUJmZiZ+9KMf4ciRI13RXOoluC+iSBo3bhz69euHq6++Gl999VW0m0PdSE1NDQAgMTGx2Rjuj3o2JhYRUl5eDr/fj7S0tJDn09LSmu1fWlJS0q546v06sh0NHz4c77zzDv7+97/jL3/5CxRFwbRp03D27NmuaDL1As3tiywWC5xOZ5RaRT1Nv379sHbtWvztb3/D3/72N2RmZmLGjBnYu3dvtJtG3YCiKFi0aBEuvfRSjB49utk4nhv1bHK0G0BE4cnOzkZ2dnbw72nTpmHEiBF444038Mwzz0SxZUTUlwwfPhzDhw8P/j1t2jScOnUKr7zyCt5///0otoy6gwceeACHDx/Gl19+Ge2mUCdixSJCkpOTIUkSSktLQ54vLS1Fenp6k8ukp6e3K556v45sR+fTaDQYP348Tp482RlNpF6ouX1RXFwcDAZDlFpFvcHkyZO5LyI8+OCD+OSTT7Bt2zZkZGS0GMtzo56NiUWEaLVaTJw4EVu3bg0+pygKtm7dGnI1uaHs7OyQeAD47LPPmo2n3q8j29H5/H4/Dh06hH79+nVWM6mX4b6IOsv+/fu5L+rDVFXFgw8+iI8++giff/45srKyWl2G+6MeTqWIWb9+varT6dR169apR48eVRcsWKDGx8erJSUlqqqq6h133KE+/vjjwfivvvpKlWVZfemll9Rvv/1WXbZsmarRaNRDhw5F6ytQN9De7Wj58uXqp59+qp46dUrNzc1Vb7vtNlWv16tHjhyJ1legKLNareq+ffvUffv2qQDUl19+Wd23b5965swZVVVV9fHHH1fvuOOOYPzp06dVo9GoPvroo+q3336rrlmzRpUkSd2yZUu0vgJ1A+3djl555RV106ZN6okTJ9RDhw6pDz/8sCqKovqf//wnWl+Bouy+++5TzWazun37drW4uDj4cDgcwRieG/UuTCwi7LXXXlMHDhyoarVadfLkyeru3buDr02fPl2dN29eSPyGDRvUYcOGqVqtVh01apS6efPmLm4xdUft2Y4WLVoUjE1LS1OvvfZade/evVFoNXUX27ZtUwE0etRtN/PmzVOnT5/eaJlx48apWq1WveCCC9R33323y9tN3Ut7t6Pnn39evfDCC1W9Xq8mJiaqM2bMUD///PPoNJ66haa2HwAh+xeeG/UuvI8FERERERGFjWMsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIiIiIgobEwsiIgoqjweD4YMGYKdO3cCAPLz8yEIAvbv3x+V9jz++ONYuHBhVD6biKgnY2JBRNRHlZWV4b777sPAgQOh0+mQnp6OWbNm4auvvgrGDB48GIIgYPfu3SHLLlq0CDNmzAj+/bvf/Q6CIEAQBEiShMzMTCxYsACVlZWttmPt2rXIysrCtGnTIvbdAGDGjBlYtGhRu5dbsmQJ3nvvPZw+fTqi7SEi6u2YWBAR9VE33XQT9u3bh/feew/fffcdPv74Y8yYMQMVFRUhcXq9Ho899lir7zdq1CgUFxejoKAA7777LrZs2YL77ruvxWVUVcXq1atx9913h/VdIik5ORmzZs3C66+/Hu2mEBH1KEwsiIj6oOrqavz3v//F888/j5kzZ2LQoEGYPHkyli5dihtuuCEkdsGCBdi9ezf++c9/tviesiwjPT0dAwYMwFVXXYWf/OQn+Oyzz1pcJjc3F6dOncJ1113X6LVjx45h2rRp0Ov1GD16NHbs2BHy+uHDhzF79myYTCakpaXhjjvuQHl5OQDgrrvuwo4dO7Bq1apgJSU/Px9+vx933303srKyYDAYMHz4cKxatarRZ19//fVYv359i20nIqJQTCyIiPogk8kEk8mETZs2we12txiblZWFX/7yl1i6dCkURWnT++fn5+PTTz+FVqttMe6///0vhg0bhtjY2EavPfroo/jVr36Fffv2ITs7G9dff32wmlJdXY0rr7wS48ePR05ODrZs2YLS0lLccsstAIBVq1YhOzsb9957L4qLi1FcXIzMzEwoioKMjAxs3LgRR48exVNPPYXf/OY32LBhQ8hnT548GWfPnkV+fn6bvi8RETGxICLqk2RZxrp16/Dee+8hPj4el156KX7zm9/g4MGDTcY/8cQTyMvLw1//+tdm3/PQoUMwmUwwGAzIysrCkSNHWu1CdebMGfTv37/J1x588EHcdNNNGDFiBF5//XWYzWb8+c9/BgCsXr0a48ePx4oVK3DRRRdh/PjxeOedd7Bt2zZ89913MJvN0Gq1MBqNSE9PR3p6OiRJgkajwfLlyzFp0iRkZWXh9ttvx/z58xslFnVtOnPmTIvtJyKiekwsiIj6qJtuuglFRUX4+OOP8YMf/ADbt2/HhAkTsG7dukaxKSkpWLJkCZ566il4PJ4m32/48OHYv38/9uzZg8ceewyzZs1qdXYlp9MJvV7f5GvZ2dnB/5dlGZMmTcK3334LADhw4AC2bdsWrLyYTCZcdNFFAIBTp061+Jlr1qzBxIkTkZKSApPJhDfffBMFBQUhMQaDAQDgcDhafC8iIqrHxIKIqA/T6/W4+uqr8eSTT2Lnzp246667sGzZsiZjFy9eDKfTiT/96U9Nvq7VajFkyBCMHj0azz33HCRJwvLly1v8/OTkZFRVVbW73TabDddffz32798f8jhx4gSuuOKKZpdbv349lixZgrvvvhv//ve/sX//fsyfP79RslQ3m1VKSkq720ZE1FcxsSAioqCRI0fCbrc3+ZrJZMKTTz6JZ599FlartdX3euKJJ/DSSy+hqKio2Zjx48fj2LFjUFW10WsNp7j1+XzIzc3FiBEjAAATJkzAkSNHMHjwYAwZMiTkERMTAyCQ6Pj9/pD3/OqrrzBt2jTcf//9GD9+PIYMGdJkhePw4cPQaDQYNWpUq9+TiIgCmFgQEfVBFRUVuPLKK/GXv/wFBw8eRF5eHjZu3IgXXngBP/rRj5pdbsGCBTCbzfjggw9a/Yzs7GyMGTMGK1asaDZm5syZsNlsOHLkSKPX1qxZg48++gjHjh3DAw88gKqqKvz85z8HADzwwAOorKzE3LlzsWfPHpw6dQqffvop5s+fH0wmBg8ejK+//hr5+fkoLy+HoigYOnQocnJy8Omnn+K7777Dk08+iT179jT67P/+97+4/PLLg12iiIiodUwsiIj6IJPJhClTpuCVV17BFVdcgdGjR+PJJ5/Evffei9WrVze7nEajwTPPPAOXy9Wmz3nkkUfw9ttvo7CwsMnXk5KS8OMf/7jJQeHPPfccnnvuOYwdOxZffvklPv74YyQnJwMIDK7+6quv4Pf7cc011+Diiy/GokWLEB8fD1EMHNqWLFkCSZIwcuRIpKSkoKCgAL/4xS9w44034tZbb8WUKVNQUVGB+++/v9Fnr1+/Hvfee2+bviMREQUIalP1ZyIioi5y8OBBXH311Th16hRMJlO0m4N//etf+NWvfoWDBw9CluVoN4eIqMdgxYKIiKJqzJgxeP7555GXlxftpgAA7HY73n33XSYVRETtxIoFERERERGFjRULIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIKGxMLIiIiIiIK2/8HVEOMSDr/O/AAAAAASUVORK5CYII=", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -593,23 +912,23 @@ } ], "source": [ - "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", "\n", - "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X500\"]):\n", " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", "\n", " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", - " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (beta)\")\n", " ax.legend()\n", - "fig.suptitle(\"Linear ANCOVA model with Permutation Forest\")\n", + "fig.suptitle(\"Correlated Logit model with Coleman Forest (permute per tree)\")\n", "fig.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, - "id": "34294429-04f3-4b12-baf3-fa6fdc11646f", + "id": "0f1ea3aa-ebad-4ff5-ae7b-abe3aa8308b6", "metadata": {}, "outputs": [], "source": [] diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index 0061cd66a..95702f9e5 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,3 +1,3 @@ from ._might import MIGHT, MIGHT_MV -from .forestht import FeatureImportanceForestRegressor, ForestHT +from .forestht import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor, ForestHT from .permutationforest import PermutationForestClassifier, PermutationForestRegressor diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 3ad77c8f8..a761c8660 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -8,9 +8,10 @@ BaseForest, ForestClassifier, ForestRegressor, + RandomForestClassifier, RandomForestRegressor, ) -from sktree._lib.sklearn.tree import DecisionTreeRegressor +from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from ..ensemble import HonestForestClassifier from .utils import ( @@ -1171,3 +1172,578 @@ def test( self.null_dist_ = null_dist return observe_stat, pvalue + + +class FeatureImportanceForestClassifier(BaseForestHT): + """Forest hypothesis testing with categorical `y` variable. + + The dataset is split into a training and testing dataset initially. Then there + are two forests that are trained: one on the original dataset, and one on the + permuted dataset. The dataset is either permuted once, or independently for + each tree in the permuted forest. The original test statistic is computed by + comparing the metric on both forests ``(metric_forest - metric_perm_forest)``. + + Then the output predictions are randomly sampled to recompute the test statistic + ``n_repeats`` times. The p-value is computed as the proportion of times the + null test statistic is greater than the original test statistic. + + Parameters + ---------- + estimator : object, default=None + Type of forest estimator to use. By default `None`, which defaults to + :class:`sklearn.ensemble.RandomForestRegressor`. + + n_estimators : int, default=100 + The number of trees in the forest. + + criterion : {"gini", "entropy"}, default="gini" + The function to measure the quality of a split. Supported criteria are + "gini" for the Gini impurity and "entropy" for the information gain. + Note: this parameter is tree-specific. + + splitter : {"best", "random"}, default="best" + The strategy used to choose the split at each node. Supported + strategies are "best" to choose the best split and "random" to choose + the best random split. + + max_depth : int, default=None + The maximum depth of the tree. If None, then nodes are expanded until + all leaves are pure or until all leaves contain less than + min_samples_split samples. + + min_samples_split : int or float, default=2 + The minimum number of samples required to split an internal node: + + - If int, then consider `min_samples_split` as the minimum number. + - If float, then `min_samples_split` is a fraction and + `ceil(min_samples_split * n_samples)` are the minimum + number of samples for each split. + + min_samples_leaf : int or float, default=1 + The minimum number of samples required to be at a leaf node. + A split point at any depth will only be considered if it leaves at + least ``min_samples_leaf`` training samples in each of the left and + right branches. This may have the effect of smoothing the model, + especially in regression. + + - If int, then consider `min_samples_leaf` as the minimum number. + - If float, then `min_samples_leaf` is a fraction and + `ceil(min_samples_leaf * n_samples)` are the minimum + number of samples for each node. + + min_weight_fraction_leaf : float, default=0.0 + The minimum weighted fraction of the sum total of weights (of all + the input samples) required to be at a leaf node. Samples have + equal weight when sample_weight is not provided. + + max_features : {"sqrt", "log2", None}, int or float, default="sqrt" + The number of features to consider when looking for the best split: + + - If int, then consider `max_features` features at each split. + - If float, then `max_features` is a fraction and + `round(max_features * n_features)` features are considered at each + split. + - If "auto", then `max_features=sqrt(n_features)`. + - If "sqrt", then `max_features=sqrt(n_features)`. + - If "log2", then `max_features=log2(n_features)`. + - If None, then `max_features=n_features`. + + Note: the search for a split does not stop until at least one + valid partition of the node samples is found, even if it requires to + effectively inspect more than ``max_features`` features. + + max_leaf_nodes : int, default=None + Grow trees with ``max_leaf_nodes`` in best-first fashion. + Best nodes are defined as relative reduction in impurity. + If None then unlimited number of leaf nodes. + + min_impurity_decrease : float, default=0.0 + A node will be split if this split induces a decrease of the impurity + greater than or equal to this value. + + The weighted impurity decrease equation is the following:: + + N_t / N * (impurity - N_t_R / N_t * right_impurity + - N_t_L / N_t * left_impurity) + + where ``N`` is the total number of samples, ``N_t`` is the number of + samples at the current node, ``N_t_L`` is the number of samples in the + left child, and ``N_t_R`` is the number of samples in the right child. + + ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, + if ``sample_weight`` is passed. + + bootstrap : bool, default=True + Whether bootstrap samples are used when building trees. If False, the + whole dataset is used to build each tree. + + oob_score : bool, default=False + Whether to use out-of-bag samples to estimate the generalization score. + Only available if bootstrap=True. + + n_jobs : int, default=None + The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, + :meth:`decision_path` and :meth:`apply` are all parallelized over the + trees. ``None`` means 1 unless in a `joblib.parallel_backend` + context. ``-1`` means using all processors. See :term:`Glossary + ` for more details. + + random_state : int, RandomState instance or None, default=None + Controls both the randomness of the bootstrapping of the samples used + when building trees (if ``bootstrap=True``) and the sampling of the + features to consider when looking for the best split at each node + (if ``max_features < n_features``). + See :term:`Glossary ` for details. + + verbose : int, default=0 + Controls the verbosity when fitting and predicting. + + warm_start : bool, default=False + When set to ``True``, reuse the solution of the previous call to fit + and add more estimators to the ensemble, otherwise, just fit a whole + new forest. See :term:`the Glossary `. + + class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ + default=None + Weights associated with classes in the form ``{class_label: weight}``. + If not given, all classes are supposed to have weight one. For + multi-output problems, a list of dicts can be provided in the same + order as the columns of y. + + Note that for multioutput (including multilabel) weights should be + defined for each class of every column in its own dict. For example, + for four-class multilabel classification weights should be + [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of + [{1:1}, {2:5}, {3:1}, {4:1}]. + + The "balanced" mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies in the input data + as ``n_samples / (n_classes * np.bincount(y))`` + + The "balanced_subsample" mode is the same as "balanced" except that + weights are computed based on the bootstrap sample for every tree + grown. + + For multi-output, the weights of each column of y will be multiplied. + + Note that these weights will be multiplied with sample_weight (passed + through the fit method) if sample_weight is specified. + + ccp_alpha : non-negative float, default=0.0 + Complexity parameter used for Minimal Cost-Complexity Pruning. The + subtree with the largest cost complexity that is smaller than + ``ccp_alpha`` will be chosen. By default, no pruning is performed. See + :ref:`minimal_cost_complexity_pruning` for details. + + max_samples : int or float, default=None + If bootstrap is True, the number of samples to draw from X + to train each base tree estimator. + + - If None (default), then draw `X.shape[0]` samples. + - If int, then draw `max_samples` samples. + - If float, then draw `max_samples * X.shape[0]` samples. Thus, + `max_samples` should be in the interval `(0.0, 1.0]`. + + Attributes + ---------- + samples_ : ArrayLike of shape (n_samples,) + The indices of the samples used in the final test. + + y_true_final_ : ArrayLike of shape (n_samples_final,) + The true labels of the samples used in the final test. + + posterior_final_ : ArrayLike of shape (n_samples_final,) + The predicted posterior probabilities of the samples used in the final test. + + null_dist_ : ArrayLike of shape (n_repeats,) + The null distribution of the test statistic. + """ + + def __init__( + self, + estimator=None, + n_estimators=100, + criterion="gini", + max_depth=None, + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_features="sqrt", + max_leaf_nodes=None, + min_impurity_decrease=0.0, + bootstrap=False, + oob_score=False, + n_jobs=None, + random_state=None, + verbose=0, + warm_start=False, + ccp_alpha=0.0, + max_samples=None, + permute_per_tree=True, + sample_dataset_per_tree=False, + **estimator_kwargs, + ): + super().__init__( + estimator=estimator, + n_estimators=n_estimators, + criterion=criterion, + max_depth=max_depth, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_features=max_features, + max_leaf_nodes=max_leaf_nodes, + min_impurity_decrease=min_impurity_decrease, + bootstrap=bootstrap, + oob_score=oob_score, + n_jobs=n_jobs, + random_state=random_state, + verbose=verbose, + warm_start=warm_start, + ccp_alpha=ccp_alpha, + max_samples=max_samples, + **estimator_kwargs, + ) + self.permute_per_tree = permute_per_tree + self.sample_dataset_per_tree = sample_dataset_per_tree + + def _get_estimator(self): + if self.estimator is None: + estimator_ = RandomForestClassifier( + n_estimators=self.n_estimators, + criterion=self.criterion, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + min_impurity_decrease=self.min_impurity_decrease, + bootstrap=self.bootstrap, + oob_score=self.oob_score, + n_jobs=self.n_jobs, + random_state=self.random_state, + verbose=self.verbose, + warm_start=self.warm_start, + ccp_alpha=self.ccp_alpha, + max_samples=self.max_samples, + **self.estimator_kwargs, + ) + elif isinstance(self.estimator, ForestClassifier): + raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator)}") + else: + estimator_ = self.estimator + return estimator_ + + def _statistic( + self, + estimator: BaseForest, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + **metric_kwargs, + ): + """Helper function to compute the test statistic.""" + metric_func = METRIC_FUNCTIONS[metric] + rng = np.random.default_rng(self.random_state) + n_samples = X.shape[0] + + if self.permute_per_tree and not self.sample_dataset_per_tree: + # first run a dummy fit on the samples to initialize the + # internal data structure of the forest + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + + # Fit each tree and compute posteriors with train test splits + n_samples_test = len(self.indices_test_) + + # now initialize posterior array as (n_trees, n_samples_test, n_outputs) + posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) + for idx in range(self.n_estimators): + tree: DecisionTreeClassifier = estimator.estimators_[idx] + train_tree( + tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index + ) + + y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + posterior_arr[idx, ...] = y_pred # posterior + + y_true_final = y[self.indices_test_, :] + # Average all posteriors + posterior_final = np.nanmean(posterior_arr, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() + elif self.permute_per_tree and self.sample_dataset_per_tree: + # first run a dummy fit on the samples to initialize the + # internal data structure of the forest + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + + # now initialize posterior array as (n_trees, n_samples, n_outputs) + posterior_arr = np.full((self.n_estimators, n_samples, estimator.n_outputs_), np.nan) + # Fit each tree and compute posteriors with train test splits + for idx in range(self.n_estimators): + # sample train/test dataset for each tree + indices_train, indices_test = train_test_split( + np.arange(n_samples, dtype=int), + test_size=self.test_size_, + shuffle=True, + random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), + ) + tree: DecisionTreeClassifier = estimator.estimators_[idx] + train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + + y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) + + posterior_arr[idx, indices_test, :] = y_pred # posterior + + # Average all posteriors + posterior_final = np.nanmean(posterior_arr, axis=0) + + # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + + # Ignore all NaN values (samples not tested) + y_true_final = y[nonnan_indices, :] + posterior_final = posterior_final[nonnan_indices, :] + samples = nonnan_indices + else: + X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] + y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + + if covariate_index is not None: + # perform permutation of covariates + n_samples_train = X_train.shape[0] + index_arr = rng.choice( + np.arange(n_samples_train, dtype=int), + size=(n_samples_train, 1), + replace=False, + shuffle=True, + ) + X_train[:, covariate_index] = X_train[index_arr, covariate_index] + + estimator.fit(X_train, y_train) + y_pred = estimator.predict(X_test) + + # set variables to compute metric + samples = self.indices_test_ + y_true_final = y_test + posterior_final = y_pred + + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) + if covariate_index is None: + # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) + # arrays of y and predicted posterior + self.samples_ = samples + self.y_true_final_ = y_true_final + self.posterior_final_ = posterior_final + self.stat_ = stat + + if return_posteriors: + return stat, posterior_final, samples + + return stat + + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + check_input: bool = True, + **metric_kwargs, + ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "auc". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + if check_input: + X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + if metric not in REGRESSOR_METRICS: + raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') + + if covariate_index is None: + self.estimator_ = self._get_estimator() + estimator = self.estimator_ + else: + self.permuted_estimator_ = clone(self.estimator_) + estimator = self.permuted_estimator_ + + return self._statistic( + estimator, + X, + y, + covariate_index=covariate_index, + metric=metric, + return_posteriors=return_posteriors, + **metric_kwargs, + ) + + def test( + self, + X, + y, + covariate_index: ArrayLike, + metric: str = "mse", + test_size: float = 0.2, + n_repeats: int = 1000, + return_posteriors: bool = False, + **metric_kwargs, + ): + """Perform hypothesis test using Coleman method. + + X is split into a training/testing split. Optionally, the covariate index + columns are shuffled. + + On the training dataset, two honest forests are trained and then the posterior + is estimated on the testing dataset. One honest forest is trained on the + permuted dataset and the other is trained on the original dataset. + + Finally, resample the posteriors of the two forests to compute the null + distribution of the statistics. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "mse". + test_size : float, optional + Proportion of samples per tree to use for the test set, by default 0.2. + n_repeats : int, optional + Number of times to sample the null distribution, by default 1000. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + + Returns + ------- + stat : float + The test statistic. + pval : float + The p-value of the test statistic. + """ + X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + indices = np.arange(X.shape[0]) + self.test_size_ = int(test_size * X.shape[0]) + # if not self.permute_per_tree: + # # train/test split + # # XXX: could add stratifying by y when y is classification + # indices_train, indices_test = train_test_split( + # indices, test_size=test_size, shuffle=True + # ) + # self.indices_train_ = indices_train + # self.indices_test_ = indices_test + indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) + self.indices_train_ = indices_train + self.indices_test_ = indices_test + + if not hasattr(self, "samples_"): + # first compute the test statistic on the un-permuted data + observe_stat, observe_posteriors, observe_samples = self.statistic( + X, + y, + covariate_index=None, + metric=metric, + return_posteriors=True, + check_input=False, + **metric_kwargs, + ) + else: + observe_samples = self.samples_ + observe_posteriors = self.posterior_final_ + observe_stat = self.stat_ + + # next permute the data + permute_stat, permute_posteriors, permute_samples = self.statistic( + X, + y, + covariate_index=covariate_index, + metric=metric, + return_posteriors=True, + check_input=False, + **metric_kwargs, + ) + + # Note: at this point, both `estimator` and `permuted_estimator_` should + # have been fitted already, so we can now compute on the null by resampling + # the posteriors and computing the test statistic on the resampled posteriors + if self.sample_dataset_per_tree: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[observe_samples, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + else: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[self.indices_test_, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + # metric^\pi - metric = observed test statistic, which under the null is normally distributed around 0 + observe_stat = permute_stat - observe_stat + + # metric^\pi_j - metric_j, which is centered at 0 + null_dist = metric_star_pi - metric_star + + # compute pvalue + pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) + + if return_posteriors: + self.observe_posteriors_ = observe_posteriors + self.permute_posteriors_ = permute_posteriors + self.observe_samples_ = observe_samples + self.permute_samples_ = permute_samples + + self.null_dist_ = null_dist + return observe_stat, pvalue From f26825da45b60bc49758fc9a401eafb0e86b0d51 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 18 Sep 2023 15:09:31 -0400 Subject: [PATCH 18/70] Patch up python code Signed-off-by: Adam Li --- benchmarks_nonasv/bench_forestht.py | 53 +- .../test_permutation_forest.ipynb | 2 +- sktree/__init__.py | 2 - sktree/conftest.py | 1 + sktree/meson.build | 2 +- sktree/stats/__init__.py | 2 +- sktree/stats/_might.py | 2 +- sktree/stats/forestht.py | 951 +++--------------- sktree/stats/permutationforest.py | 12 +- sktree/stats/tests/test_forestht.py | 151 ++- sktree/stats/utils.py | 6 +- 11 files changed, 236 insertions(+), 948 deletions(-) diff --git a/benchmarks_nonasv/bench_forestht.py b/benchmarks_nonasv/bench_forestht.py index c18bb9e8b..32bff3e94 100644 --- a/benchmarks_nonasv/bench_forestht.py +++ b/benchmarks_nonasv/bench_forestht.py @@ -87,10 +87,6 @@ def linear_model_ancova(sigma_factor=2.0, seed=None): return pvalue_dict -def linear_model_mars(): - pass - - def correlated_logit_model(beta=5.0, seed=None): n_samples = 600 n_estimators = 125 @@ -149,14 +145,43 @@ def random_forest_model(): pass -if __name__ == "__main__": +def evaluate_correlated_logit_model(): + pvalue_dict = defaultdict(list) + rng = np.random.default_rng(seed) + + beta_space = np.hstack((np.linspace(0.01, 2.5, 8), np.linspace(5, 20, 7))) + for beta in beta_space: + for idx in range(5): + new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) + + elements_dict = correlated_logit_model(beta, new_seed) + for key, value in elements_dict.items(): + pvalue_dict[key].append(value) + pvalue_dict["sigma_factor"].append(beta) + + df = pd.DataFrame(pvalue_dict) + + fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True) + axs = axs.flatten() + + for ax, name in zip(axs, ["X1", "X2", "X500"]): + sns.lineplot(data=df, x="sigma_factor", y=name, ax=ax, marker="o") + + ax.axhline([0.05], ls="--", color="red", label="alpha") + ax.set(title=name, ylabel="pvalue", xlabel="SNR (beta)") + ax.legend() + fig.suptitle("Correlated Logit model") + fig.tight_layout() + + +def evaluate_linear_ancova_model(): pvalue_dict = defaultdict(list) rng = np.random.default_rng(seed) j_space = np.linspace(0.005, 2.25, 9) for sigma_factor in j_space: - for idx in range(10): + for idx in range(5): new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) elements_dict = linear_model_ancova(sigma_factor, new_seed) @@ -165,3 +190,19 @@ def random_forest_model(): pvalue_dict["sigma_factor"].append(sigma_factor) df = pd.DataFrame(pvalue_dict) + + fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True) + axs = axs.flatten() + + for ax, name in zip(axs, ["X1", "X2", "X6", "X7"]): + sns.lineplot(data=df, x="sigma_factor", y=name, ax=ax, marker="o") + + ax.axhline([0.05], ls="--", color="red", label="alpha") + ax.set(title=name, ylabel="pvalue", xlabel="SNR (10 / x)") + ax.legend() + fig.suptitle("Linear ANCOVA model") + fig.tight_layout() + + +if __name__ == "__main__": + evaluate_linear_ancova_model() diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 12a0e6c0e..bd7e6c43b 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -202,7 +202,7 @@ "\n", " # test for X_2 important\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [1], test_size=test_size, n_repeats=n_repeats, metric=metric\n", + " X.copy(), y.copy(), covariate_index=[1], test_size=test_size, n_repeats=n_repeats, metric=metric\n", " )\n", " pvalue_dict[\"X2\"] = pvalue\n", " print(\"X2: \", pvalue)\n", diff --git a/sktree/__init__.py b/sktree/__init__.py index 8a9e32eb5..c6af80ea0 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -58,7 +58,6 @@ PatchObliqueRandomForestRegressor, ) from .ensemble._honest_forest import HonestForestClassifier - from .stats import ForestHT except ImportError as e: msg = """Error importing scikit-tree: you cannot import scikit-tree while being in scikit-tree source directory; please exit the scikit-tree source @@ -86,5 +85,4 @@ "ExtraTreesClassifier", "ExtraTreesRegressor", "ExtendedIsolationForest", - "ForestHT", ] diff --git a/sktree/conftest.py b/sktree/conftest.py index 037cecae6..c226c3c60 100644 --- a/sktree/conftest.py +++ b/sktree/conftest.py @@ -1,2 +1,3 @@ def pytest_configure(config): + """Set up pytest markers.""" config.addinivalue_line("markers", "slowtest: mark test as slow") diff --git a/sktree/meson.build b/sktree/meson.build index b593f75aa..8608052b8 100644 --- a/sktree/meson.build +++ b/sktree/meson.build @@ -54,7 +54,7 @@ cython_c_args += numpy_nodepr_api python_sources = [ '__init__.py', 'neighbors.py', - # 'conftest.py', + 'conftest.py', ] py3.install_sources( diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index 95702f9e5..6191b93dd 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,3 +1,3 @@ from ._might import MIGHT, MIGHT_MV -from .forestht import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor, ForestHT +from .forestht import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor from .permutationforest import PermutationForestClassifier, PermutationForestRegressor diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py index 102dc1448..9e57ed921 100644 --- a/sktree/stats/_might.py +++ b/sktree/stats/_might.py @@ -306,7 +306,7 @@ def test_diff(self, x, z, y, reps=1000, workers=1): X_permutedZ = np.hstack((x, permuted_Z)) perm_stat, perm_pos = self.statistic(X_permutedZ, y, return_pos=True) - # Bootsrap sample the posterior from the two forests + # Bootstrap sample the posterior from the two forests null_stats = np.array( Parallel(n_jobs=workers)( [delayed(pos_diff)(observe_pos, perm_pos, y, limit=self.limit) for _ in range(reps)] diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index a761c8660..cbd31fe56 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -13,7 +13,6 @@ ) from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor -from ..ensemble import HonestForestClassifier from .utils import ( METRIC_FUNCTIONS, REGRESSOR_METRICS, @@ -22,201 +21,12 @@ ) -class ForestHT(MetaEstimatorMixin): - """Forest hypothesis testing. - - For example, this allows Mutual information for gigantic hypothesis testing (MIGHT) - via ``metric="mi"``. - - Parameters - ---------- - n_estimators : int, default=100 - The number of trees in the forest. - - criterion : {"gini", "entropy"}, default="gini" - The function to measure the quality of a split. Supported criteria are - "gini" for the Gini impurity and "entropy" for the information gain. - Note: this parameter is tree-specific. - - splitter : {"best", "random"}, default="best" - The strategy used to choose the split at each node. Supported - strategies are "best" to choose the best split and "random" to choose - the best random split. - - max_depth : int, default=None - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than - min_samples_split samples. - - min_samples_split : int or float, default=2 - The minimum number of samples required to split an internal node: - - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. - - min_samples_leaf : int or float, default=1 - The minimum number of samples required to be at a leaf node. - A split point at any depth will only be considered if it leaves at - least ``min_samples_leaf`` training samples in each of the left and - right branches. This may have the effect of smoothing the model, - especially in regression. - - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. - - min_weight_fraction_leaf : float, default=0.0 - The minimum weighted fraction of the sum total of weights (of all - the input samples) required to be at a leaf node. Samples have - equal weight when sample_weight is not provided. - - max_features : {"sqrt", "log2", None}, int or float, default="sqrt" - The number of features to consider when looking for the best split: - - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `round(max_features * n_features)` features are considered at each - split. - - If "auto", then `max_features=sqrt(n_features)`. - - If "sqrt", then `max_features=sqrt(n_features)`. - - If "log2", then `max_features=log2(n_features)`. - - If None, then `max_features=n_features`. - - Note: the search for a split does not stop until at least one - valid partition of the node samples is found, even if it requires to - effectively inspect more than ``max_features`` features. - - max_leaf_nodes : int, default=None - Grow trees with ``max_leaf_nodes`` in best-first fashion. - Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. - - min_impurity_decrease : float, default=0.0 - A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. - - The weighted impurity decrease equation is the following:: - - N_t / N * (impurity - N_t_R / N_t * right_impurity - - N_t_L / N_t * left_impurity) - - where ``N`` is the total number of samples, ``N_t`` is the number of - samples at the current node, ``N_t_L`` is the number of samples in the - left child, and ``N_t_R`` is the number of samples in the right child. - - ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, - if ``sample_weight`` is passed. - - bootstrap : bool, default=True - Whether bootstrap samples are used when building trees. If False, the - whole dataset is used to build each tree. - - oob_score : bool, default=False - Whether to use out-of-bag samples to estimate the generalization score. - Only available if bootstrap=True. - - n_jobs : int, default=None - The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, - :meth:`decision_path` and :meth:`apply` are all parallelized over the - trees. ``None`` means 1 unless in a `joblib.parallel_backend` - context. ``-1`` means using all processors. See :term:`Glossary - ` for more details. - - random_state : int, RandomState instance or None, default=None - Controls both the randomness of the bootstrapping of the samples used - when building trees (if ``bootstrap=True``) and the sampling of the - features to consider when looking for the best split at each node - (if ``max_features < n_features``). - See :term:`Glossary ` for details. - - verbose : int, default=0 - Controls the verbosity when fitting and predicting. - - warm_start : bool, default=False - When set to ``True``, reuse the solution of the previous call to fit - and add more estimators to the ensemble, otherwise, just fit a whole - new forest. See :term:`the Glossary `. - - class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ - default=None - Weights associated with classes in the form ``{class_label: weight}``. - If not given, all classes are supposed to have weight one. For - multi-output problems, a list of dicts can be provided in the same - order as the columns of y. - - Note that for multioutput (including multilabel) weights should be - defined for each class of every column in its own dict. For example, - for four-class multilabel classification weights should be - [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of - [{1:1}, {2:5}, {3:1}, {4:1}]. - - The "balanced" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies in the input data - as ``n_samples / (n_classes * np.bincount(y))`` - - The "balanced_subsample" mode is the same as "balanced" except that - weights are computed based on the bootstrap sample for every tree - grown. - - For multi-output, the weights of each column of y will be multiplied. - - Note that these weights will be multiplied with sample_weight (passed - through the fit method) if sample_weight is specified. - - ccp_alpha : non-negative float, default=0.0 - Complexity parameter used for Minimal Cost-Complexity Pruning. The - subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. - - max_samples : int or float, default=None - If bootstrap is True, the number of samples to draw from X - to train each base tree estimator. - - - If None (default), then draw `X.shape[0]` samples. - - If int, then draw `max_samples` samples. - - If float, then draw `max_samples * X.shape[0]` samples. Thus, - `max_samples` should be in the interval `(0.0, 1.0]`. - - honest_prior : {"ignore", "uniform", "empirical"}, default="empirical" - Method for dealing with empty leaves during evaluation of a test - sample. If "ignore", the tree is ignored. If "uniform", the prior tree - posterior is 1/(number of classes). If "empirical", the prior tree - posterior is the relative class frequency in the voting subsample. - If all trees are ignored, the empirical estimate is returned. - - honest_fraction : float, default=0.5 - Fraction of training samples used for estimates in the trees. The - remaining samples will be used to learn the tree structure. A larger - fraction creates shallower trees with lower variance estimates. - - tree_estimator : object, default=None - Type of decision tree classifier to use. By default `None`, which - defaults to :class:`sklearn.tree.DecisionTreeClassifier`. - - Attributes - ---------- - samples_ : ArrayLike of shape (n_samples,) - The indices of the samples used in the final test. - - y_true_final_ : ArrayLike of shape (n_samples_final,) - The true labels of the samples used in the final test. - - posterior_final_ : ArrayLike of shape (n_samples_final,) - The predicted posterior probabilities of the samples used in the final test. - - null_dist_ : ArrayLike of shape (n_repeats,) - The null distribution of the test statistic. - """ - +class BaseForestHT(MetaEstimatorMixin): def __init__( self, + estimator=None, n_estimators=100, - criterion="gini", - splitter="best", + criterion="squared_error", max_depth=None, min_samples_split=2, min_samples_leaf=1, @@ -230,41 +40,15 @@ def __init__( random_state=None, verbose=0, warm_start=False, - class_weight=None, ccp_alpha=0.0, max_samples=None, - honest_prior="empirical", - honest_fraction=0.5, - tree_estimator=None, + permute_per_tree=True, + **estimator_kwargs, ): - self.estimator = HonestForestClassifier( - n_estimators=n_estimators, - criterion=criterion, - splitter=splitter, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, - random_state=random_state, - verbose=verbose, - warm_start=warm_start, - class_weight=class_weight, - ccp_alpha=ccp_alpha, - max_samples=max_samples, - honest_prior=honest_prior, - honest_fraction=honest_fraction, - tree_estimator=tree_estimator, - ) + self.estimator = estimator self.n_jobs = n_jobs self.n_estimators = n_estimators self.criterion = criterion - self.splitter = splitter self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf @@ -277,91 +61,10 @@ def __init__( self.random_state = random_state self.verbose = verbose self.warm_start = warm_start - self.class_weight = class_weight self.ccp_alpha = ccp_alpha self.max_samples = max_samples - self.honest_prior = honest_prior - self.honest_fraction = honest_fraction - self.tree_estimator = tree_estimator - - def _statistic( - self, - estimator: ForestClassifier, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="auc", - test_size=0.2, - return_posteriors: bool = False, - **metric_kwargs, - ): - """Helper function to compute the test statistic.""" - metric_func = METRIC_FUNCTIONS[metric] - rng = np.random.default_rng(self.random_state) - - # first run a dummy fit on just two samples to initialize the - # internal data structure of the forest - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - - # Fit each tree and ompute posteriors with train test splits - n_samples = X.shape[0] - indices = np.arange(n_samples, dtype=int) - posterior_arr = np.zeros((self.n_estimators, n_samples, self.estimator.n_classes_)) - for idx in range(self.n_estimators): - seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32) - indices_train, indices_test = train_test_split( - indices, test_size=test_size, stratify=y, shuffle=True, random_state=seed - ) - tree = estimator.estimators_[idx] - tree_posterior( - tree, - X[indices_train, :], - y[indices_train, :], - covariate_index, - test_size, - seed=seed, - ) - - y_pred = tree.predict_proba(X[indices_test, :]) - - # Fill test set posteriors & set rest NaN - posterior = np.full((y.shape[0], tree.n_classes_), np.nan) - posterior[indices_test, :] = y_pred - posterior_arr[idx, ...] = posterior - - # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() - y_true_final = y[samples, :] - posterior_final = posterior_final[samples, :] - if metric == "auc": - if posterior_final.shape[1] != 2: - raise ValueError( - "AUC only supports binary classification. " "Please use a different metric." - ) - print(posterior_final[:5, :]) - # get posteriors of the positive class - posterior_final = posterior_final[:, 1] - - print("Y true: ", y_true_final) - print("posterior: ", posterior_final) - stat = metric_func(y_true_final, posterior_final, **metric_kwargs) - - if covariate_index is None: - # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) - # arrays of y and predicted posterior - self.samples_ = samples - self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_final - self.stat_ = stat - - if return_posteriors: - return stat, posterior_final, samples - - return stat + self.estimator_kwargs = estimator_kwargs + self.permute_per_tree = permute_per_tree def reset(self): class_attributes = dir(type(self)) @@ -376,9 +79,9 @@ def statistic( X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, - metric="auc", - test_size=0.2, + metric="mse", return_posteriors: bool = False, + check_input: bool = True, **metric_kwargs, ): """Compute the test statistic. @@ -412,14 +115,19 @@ def statistic( equal to ``n_samples`` if all samples are encountered in the test set of at least one tree in the posterior computation. """ - X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) + if check_input: + X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + if metric not in REGRESSOR_METRICS: + raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') if covariate_index is None: - estimator = self.estimator + self.estimator_ = self._get_estimator() + estimator = self.estimator_ else: - self.permuted_estimator_ = clone(self.estimator) + self.permuted_estimator_ = clone(self.estimator_) estimator = self.permuted_estimator_ return self._statistic( @@ -428,7 +136,6 @@ def statistic( y, covariate_index=covariate_index, metric=metric, - test_size=test_size, return_posteriors=return_posteriors, **metric_kwargs, ) @@ -438,7 +145,7 @@ def test( X, y, covariate_index: ArrayLike, - metric: str = "auc", + metric: str = "mse", test_size: float = 0.2, n_repeats: int = 1000, return_posteriors: bool = False, @@ -465,7 +172,7 @@ def test( covariate_index : ArrayLike, optional of shape (n_covariates,) The index array of covariates to shuffle, by default None. metric : str, optional - The metric to compute, by default "auc". + The metric to compute, by default "mse". test_size : float, optional Proportion of samples per tree to use for the test set, by default 0.2. n_repeats : int, optional @@ -484,6 +191,20 @@ def test( if y.ndim != 2: y = y.reshape(-1, 1) + indices = np.arange(X.shape[0]) + self.test_size_ = int(test_size * X.shape[0]) + # if not self.permute_per_tree: + # # train/test split + # # XXX: could add stratifying by y when y is classification + # indices_train, indices_test = train_test_split( + # indices, test_size=test_size, shuffle=True + # ) + # self.indices_train_ = indices_train + # self.indices_test_ = indices_test + indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) + self.indices_train_ = indices_train + self.indices_test_ = indices_test + if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( @@ -491,8 +212,8 @@ def test( y, covariate_index=None, metric=metric, - test_size=test_size, return_posteriors=True, + check_input=False, **metric_kwargs, ) else: @@ -506,97 +227,50 @@ def test( y, covariate_index=covariate_index, metric=metric, - test_size=test_size, return_posteriors=True, + check_input=False, **metric_kwargs, ) # Note: at this point, both `estimator` and `permuted_estimator_` should # have been fitted already, so we can now compute on the null by resampling # the posteriors and computing the test statistic on the resampled posteriors - metric_star, metric_star_pi = _compute_null_distribution_coleman( - X_test=X, - y_test=y, - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - normal_samples=observe_samples, - perm_samples=permute_samples, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - print(observe_posteriors) - print(permute_posteriors) - # metric^\pi - metric - observe_stat = permute_stat - observe_stat - - # metric^\pi_j - metric_j - null_dist = metric_star_pi - metric_star - - pval = _pvalue(observe_stat=observe_stat, permuted_stat=null_dist, correction=True) - - if return_posteriors: - self.observe_posteriors_ = observe_posteriors - self.permute_posteriors_ = permute_posteriors - self.observe_samples_ = observe_samples - self.permute_samples_ = permute_samples - - self.null_dist_ = null_dist - return observe_stat, pval - - -class BaseForestHT(MetaEstimatorMixin): - def __init__( - self, - estimator=None, - n_estimators=100, - criterion="squared_error", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, - random_state=None, - verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, - permute_per_tree=True, - **estimator_kwargs, - ): - self.estimator = estimator - self.n_jobs = n_jobs - self.n_estimators = n_estimators - self.criterion = criterion - self.max_depth = max_depth - self.min_samples_split = min_samples_split - self.min_samples_leaf = min_samples_leaf - self.min_weight_fraction_leaf = min_weight_fraction_leaf - self.max_features = max_features - self.max_leaf_nodes = max_leaf_nodes - self.min_impurity_decrease = min_impurity_decrease - self.bootstrap = bootstrap - self.oob_score = oob_score - self.random_state = random_state - self.verbose = verbose - self.warm_start = warm_start - self.ccp_alpha = ccp_alpha - self.max_samples = max_samples - self.estimator_kwargs = estimator_kwargs - self.permute_per_tree = permute_per_tree + if self.sample_dataset_per_tree: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[observe_samples, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + else: + metric_star, metric_star_pi = _compute_null_distribution_coleman( + y_test=y[self.indices_test_, :], + y_pred_proba_normal=observe_posteriors, + y_pred_proba_perm=permute_posteriors, + metric=metric, + n_repeats=n_repeats, + seed=self.random_state, + ) + # metric^\pi - metric = observed test statistic, which under the + # null is normally distributed around 0 + observe_stat = permute_stat - observe_stat - def reset(self): - class_attributes = dir(type(self)) - instance_attributes = dir(self) + # metric^\pi_j - metric_j, which is centered at 0 + null_dist = metric_star_pi - metric_star - for attr_name in instance_attributes: - if attr_name.endswith("_") and attr_name not in class_attributes: - delattr(self, attr_name) + # compute pvalue + pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) + + if return_posteriors: + self.observe_posteriors_ = observe_posteriors + self.permute_posteriors_ = permute_posteriors + self.observe_samples_ = observe_samples + self.permute_samples_ = permute_samples + + self.null_dist_ = null_dist + return observe_stat, pvalue class FeatureImportanceForestRegressor(BaseForestHT): @@ -607,7 +281,7 @@ class FeatureImportanceForestRegressor(BaseForestHT): permuted dataset. The dataset is either permuted once, or independently for each tree in the permuted forest. The original test statistic is computed by comparing the metric on both forests ``(metric_forest - metric_perm_forest)``. - + Then the output predictions are randomly sampled to recompute the test statistic ``n_repeats`` times. The p-value is computed as the proportion of times the null test statistic is greater than the original test statistic. @@ -922,256 +596,58 @@ def _statistic( shuffle=True, random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), ) - tree: DecisionTreeRegressor = estimator.estimators_[idx] + tree = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - posterior_arr[idx, indices_test, :] = y_pred # posterior # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - - # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] - - # Ignore all NaN values (samples not tested) - y_true_final = y[nonnan_indices, :] - posterior_final = posterior_final[nonnan_indices, :] - samples = nonnan_indices - else: - X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] - y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] - - if covariate_index is not None: - # perform permutation of covariates - n_samples_train = X_train.shape[0] - index_arr = rng.choice( - np.arange(n_samples_train, dtype=int), - size=(n_samples_train, 1), - replace=False, - shuffle=True, - ) - X_train[:, covariate_index] = X_train[index_arr, covariate_index] - - estimator.fit(X_train, y_train) - y_pred = estimator.predict(X_test) - - # set variables to compute metric - samples = self.indices_test_ - y_true_final = y_test - posterior_final = y_pred - - stat = metric_func(y_true_final, posterior_final, **metric_kwargs) - if covariate_index is None: - # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) - # arrays of y and predicted posterior - self.samples_ = samples - self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_final - self.stat_ = stat - - if return_posteriors: - return stat, posterior_final, samples - - return stat - - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mse", - return_posteriors: bool = False, - check_input: bool = True, - **metric_kwargs, - ): - """Compute the test statistic. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "auc". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - if check_input: - X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) - - if metric not in REGRESSOR_METRICS: - raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') - - if covariate_index is None: - self.estimator_ = self._get_estimator() - estimator = self.estimator_ - else: - self.permuted_estimator_ = clone(self.estimator_) - estimator = self.permuted_estimator_ - - return self._statistic( - estimator, - X, - y, - covariate_index=covariate_index, - metric=metric, - return_posteriors=return_posteriors, - **metric_kwargs, - ) - - def test( - self, - X, - y, - covariate_index: ArrayLike, - metric: str = "mse", - test_size: float = 0.2, - n_repeats: int = 1000, - return_posteriors: bool = False, - **metric_kwargs, - ): - """Perform hypothesis test using Coleman method. - - X is split into a training/testing split. Optionally, the covariate index - columns are shuffled. - - On the training dataset, two honest forests are trained and then the posterior - is estimated on the testing dataset. One honest forest is trained on the - permuted dataset and the other is trained on the original dataset. - - Finally, resample the posteriors of the two forests to compute the null - distribution of the statistics. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "mse". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. - n_repeats : int, optional - Number of times to sample the null distribution, by default 1000. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - - Returns - ------- - stat : float - The test statistic. - pval : float - The p-value of the test statistic. - """ - X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) - - indices = np.arange(X.shape[0]) - self.test_size_ = int(test_size * X.shape[0]) - # if not self.permute_per_tree: - # # train/test split - # # XXX: could add stratifying by y when y is classification - # indices_train, indices_test = train_test_split( - # indices, test_size=test_size, shuffle=True - # ) - # self.indices_train_ = indices_train - # self.indices_test_ = indices_test - indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) - self.indices_train_ = indices_train - self.indices_test_ = indices_test - - if not hasattr(self, "samples_"): - # first compute the test statistic on the un-permuted data - observe_stat, observe_posteriors, observe_samples = self.statistic( - X, - y, - covariate_index=None, - metric=metric, - return_posteriors=True, - check_input=False, - **metric_kwargs, - ) - else: - observe_samples = self.samples_ - observe_posteriors = self.posterior_final_ - observe_stat = self.stat_ - - # next permute the data - permute_stat, permute_posteriors, permute_samples = self.statistic( - X, - y, - covariate_index=covariate_index, - metric=metric, - return_posteriors=True, - check_input=False, - **metric_kwargs, - ) - - # Note: at this point, both `estimator` and `permuted_estimator_` should - # have been fitted already, so we can now compute on the null by resampling - # the posteriors and computing the test statistic on the resampled posteriors - if self.sample_dataset_per_tree: - metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[observe_samples, :], - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) + posterior_final = np.nanmean(posterior_arr, axis=0) + + # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + + # Ignore all NaN values (samples not tested) + y_true_final = y[nonnan_indices, :] + posterior_final = posterior_final[nonnan_indices, :] + samples = nonnan_indices else: - metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[self.indices_test_, :], - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - # metric^\pi - metric = observed test statistic, which under the null is normally distributed around 0 - observe_stat = permute_stat - observe_stat + X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] + y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] - # metric^\pi_j - metric_j, which is centered at 0 - null_dist = metric_star_pi - metric_star + if covariate_index is not None: + # perform permutation of covariates + n_samples_train = X_train.shape[0] + index_arr = rng.choice( + np.arange(n_samples_train, dtype=int), + size=(n_samples_train, 1), + replace=False, + shuffle=True, + ) + X_train[:, covariate_index] = X_train[index_arr, covariate_index] - # compute pvalue - pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) + estimator.fit(X_train, y_train) + y_pred = estimator.predict(X_test) + + # set variables to compute metric + samples = self.indices_test_ + y_true_final = y_test + posterior_final = y_pred + + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) + if covariate_index is None: + # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) + # arrays of y and predicted posterior + self.samples_ = samples + self.y_true_final_ = y_true_final + self.posterior_final_ = posterior_final + self.stat_ = stat if return_posteriors: - self.observe_posteriors_ = observe_posteriors - self.permute_posteriors_ = permute_posteriors - self.observe_samples_ = observe_samples - self.permute_samples_ = permute_samples + return stat, posterior_final, samples - self.null_dist_ = null_dist - return observe_stat, pvalue + return stat class FeatureImportanceForestClassifier(BaseForestHT): @@ -1182,7 +658,7 @@ class FeatureImportanceForestClassifier(BaseForestHT): permuted dataset. The dataset is either permuted once, or independently for each tree in the permuted forest. The original test statistic is computed by comparing the metric on both forests ``(metric_forest - metric_perm_forest)``. - + Then the output predictions are randomly sampled to recompute the test statistic ``n_repeats`` times. The p-value is computed as the proportion of times the null test statistic is greater than the original test statistic. @@ -1497,7 +973,7 @@ def _statistic( shuffle=True, random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), ) - tree: DecisionTreeClassifier = estimator.estimators_[idx] + tree = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) @@ -1550,200 +1026,3 @@ def _statistic( return stat, posterior_final, samples return stat - - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mse", - return_posteriors: bool = False, - check_input: bool = True, - **metric_kwargs, - ): - """Compute the test statistic. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "auc". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - if check_input: - X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) - - if metric not in REGRESSOR_METRICS: - raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') - - if covariate_index is None: - self.estimator_ = self._get_estimator() - estimator = self.estimator_ - else: - self.permuted_estimator_ = clone(self.estimator_) - estimator = self.permuted_estimator_ - - return self._statistic( - estimator, - X, - y, - covariate_index=covariate_index, - metric=metric, - return_posteriors=return_posteriors, - **metric_kwargs, - ) - - def test( - self, - X, - y, - covariate_index: ArrayLike, - metric: str = "mse", - test_size: float = 0.2, - n_repeats: int = 1000, - return_posteriors: bool = False, - **metric_kwargs, - ): - """Perform hypothesis test using Coleman method. - - X is split into a training/testing split. Optionally, the covariate index - columns are shuffled. - - On the training dataset, two honest forests are trained and then the posterior - is estimated on the testing dataset. One honest forest is trained on the - permuted dataset and the other is trained on the original dataset. - - Finally, resample the posteriors of the two forests to compute the null - distribution of the statistics. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "mse". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. - n_repeats : int, optional - Number of times to sample the null distribution, by default 1000. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - - Returns - ------- - stat : float - The test statistic. - pval : float - The p-value of the test statistic. - """ - X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) - - indices = np.arange(X.shape[0]) - self.test_size_ = int(test_size * X.shape[0]) - # if not self.permute_per_tree: - # # train/test split - # # XXX: could add stratifying by y when y is classification - # indices_train, indices_test = train_test_split( - # indices, test_size=test_size, shuffle=True - # ) - # self.indices_train_ = indices_train - # self.indices_test_ = indices_test - indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) - self.indices_train_ = indices_train - self.indices_test_ = indices_test - - if not hasattr(self, "samples_"): - # first compute the test statistic on the un-permuted data - observe_stat, observe_posteriors, observe_samples = self.statistic( - X, - y, - covariate_index=None, - metric=metric, - return_posteriors=True, - check_input=False, - **metric_kwargs, - ) - else: - observe_samples = self.samples_ - observe_posteriors = self.posterior_final_ - observe_stat = self.stat_ - - # next permute the data - permute_stat, permute_posteriors, permute_samples = self.statistic( - X, - y, - covariate_index=covariate_index, - metric=metric, - return_posteriors=True, - check_input=False, - **metric_kwargs, - ) - - # Note: at this point, both `estimator` and `permuted_estimator_` should - # have been fitted already, so we can now compute on the null by resampling - # the posteriors and computing the test statistic on the resampled posteriors - if self.sample_dataset_per_tree: - metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[observe_samples, :], - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - else: - metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[self.indices_test_, :], - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, - metric=metric, - n_repeats=n_repeats, - seed=self.random_state, - ) - # metric^\pi - metric = observed test statistic, which under the null is normally distributed around 0 - observe_stat = permute_stat - observe_stat - - # metric^\pi_j - metric_j, which is centered at 0 - null_dist = metric_star_pi - metric_star - - # compute pvalue - pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) - - if return_posteriors: - self.observe_posteriors_ = observe_posteriors - self.permute_posteriors_ = permute_posteriors - self.observe_samples_ = observe_samples - self.permute_samples_ = permute_samples - - self.null_dist_ = null_dist - return observe_stat, pvalue diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index fb9c6a98b..55b79833b 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -243,7 +243,7 @@ def test( if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data - observe_stat, observe_posteriors, observe_samples = self.statistic( + observe_stat, _, _ = self.statistic( X, y, covariate_index=None, @@ -253,8 +253,8 @@ def test( **metric_kwargs, ) else: - observe_samples = self.samples_ - observe_posteriors = self.posterior_final_ + # observe_samples = self.samples_ + # observe_posteriors = self.posterior_final_ observe_stat = self.stat_ # compute null distribution of the test statistic @@ -572,7 +572,7 @@ class PermutationForestClassifier(BasePermutationForest): non-permuted data. .. note:: This does not allow testing on the posteriors. - + Parameters ---------- estimator : object, default=None @@ -830,6 +830,6 @@ def _get_estimator(self): max_samples=self.max_samples, **self.estimator_kwargs, ) - elif not isinstance(self.estimator_, ForestRegressor): - raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") + elif not isinstance(self.estimator_, ForestClassifier): + raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator_)}") return estimator_ diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 93e2138f8..d02f09fa8 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -5,11 +5,11 @@ from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import ( + FeatureImportanceForestClassifier, FeatureImportanceForestRegressor, PermutationForestClassifier, PermutationForestRegressor, ) -from sktree.stats.forestht import ForestHT from sktree.tree import ObliqueDecisionTreeClassifier # load the iris dataset @@ -39,13 +39,36 @@ def test_forestht_proper_attributes(): @pytest.mark.slowtest @pytest.mark.parametrize( - "hypotester", + "hypotester, model_kwargs, n_samples, n_repeats, test_size", [ - # PermutationForestRegressor, - FeatureImportanceForestRegressor + [ + PermutationForestRegressor, + { + "max_features": 1.0, + "random_state": seed, + "n_estimators": 50, + "n_jobs": -1, + }, + 550, + 50, + 0.1, + ], + [ + FeatureImportanceForestRegressor, + { + "max_features": 1.0, + "random_state": seed, + "n_estimators": 125, + "permute_per_tree": True, + "n_jobs": -1, + }, + 600, + 200, + 0.1, + ], ], ) -def test_linear_model(hypotester): +def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): r"""Test hypothesis testing forests using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. @@ -54,10 +77,6 @@ def test_linear_model(hypotester): """ beta = 10.0 sigma = 0.5 - n_samples = 550 - n_estimators = 50 - test_size = 0.1 - n_repeats = 50 metric = "mse" rng = np.random.default_rng(seed) @@ -77,12 +96,7 @@ def test_linear_model(hypotester): # compute final y of (n_samples,) y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon - est = hypotester( - max_features=1.0, - random_state=seed, - n_estimators=n_estimators, - n_jobs=-1, - ) + est = hypotester(**model_kwargs) # test for X_1 stat, pvalue = est.test(X, y, [0], metric=metric, test_size=test_size, n_repeats=n_repeats) @@ -106,76 +120,37 @@ def test_linear_model(hypotester): @pytest.mark.slowtest @pytest.mark.parametrize( - "hypotester", + "hypotester, model_kwargs, n_samples, n_repeats, test_size", [ - # PermutationForestRegressor, - FeatureImportanceForestRegressor + [ + PermutationForestClassifier, + { + "max_features": "sqrt", + "random_state": seed, + "n_estimators": 75, + "n_jobs": -1, + }, + 500, + 50, + 1.0 / 6, + ], + [ + FeatureImportanceForestClassifier, + { + "max_features": "sqrt", + "random_state": seed, + "n_estimators": 125, + "permute_per_tree": True, + "sample_dataset_per_tree": True, + "n_jobs": -1, + }, + 500, + 100, + 1.0 / 6, + ], ], ) -def test_linear_model_withcoleman(hypotester): - r"""Test hypothesis testing forests using MSE from linear model simulation. - - See https://arxiv.org/pdf/1904.07830.pdf Figure 1. - - Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon - """ - beta = 10.0 - sigma = 0.5 - n_samples = 600 - n_estimators = 125 - test_size = 0.1 - n_repeats = 200 - permute_per_tree = True - metric = "mse" - - rng = np.random.default_rng(seed) - - # sample covariates - X_15 = rng.uniform(0, 1, size=(n_samples, 5)) - X_610 = np.zeros((n_samples, 5)) - for idx in range(5): - X_610[:, idx] = np.argwhere( - rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) - )[:, 1] - X = np.concatenate((X_15, X_610), axis=1) - assert X.shape == (n_samples, 10) - - # sample noise - epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) - - # compute final y of (n_samples,) - y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon - est = hypotester( - max_features=1.0, - random_state=seed, - n_estimators=n_estimators, - permute_per_tree=permute_per_tree, - n_jobs=-1, - ) - - # test for X_1 - stat, pvalue = est.test(X, y, [0], metric=metric, test_size=test_size, n_repeats=n_repeats) - print("X1: ", pvalue) - assert pvalue < 0.05, f"pvalue: {pvalue}" - - # test for X_6 - stat, pvalue = est.test(X, y, [5], metric=metric, test_size=test_size, n_repeats=n_repeats) - print("X6: ", pvalue) - assert pvalue < 0.05, f"pvalue: {pvalue}" - - # test for a few unimportant other X - for covariate_index in [1, 6]: - # test for X_2, X_7 - stat, pvalue = est.test( - X, y, [covariate_index], metric=metric, test_size=test_size, n_repeats=n_repeats - ) - print("X2/7: ", pvalue) - assert pvalue > 0.05, f"pvalue: {pvalue}" - - -@pytest.mark.slowtest -@pytest.mark.parametrize("hypotester", [PermutationForestClassifier]) -def test_correlated_logit_model(hypotester): +def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): r"""Test MIGHT using MSE from linear model simulation. See https://arxiv.org/pdf/1904.07830.pdf Figure 1. @@ -183,13 +158,7 @@ def test_correlated_logit_model(hypotester): P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) """ beta = 5.0 - n_samples = 600 - n_estimators = 50 - n_jobs = -1 - max_features = "sqrt" - test_size = 1.0 / 6 metric = "mse" - n_repeats = 50 n = 200 # Number of time steps ar_coefficient = 0.0015 @@ -216,9 +185,7 @@ def test_correlated_logit_model(hypotester): assert y_proba.shape == (n_samples,) y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) - est = hypotester( - max_features=max_features, random_state=seed, n_estimators=n_estimators, n_jobs=n_jobs - ) + est = hypotester(**model_kwargs) # test for X_2 important stat, pvalue = est.test( @@ -256,7 +223,7 @@ def test_correlated_logit_model(hypotester): @pytest.mark.parametrize("limit", [0.05, 0.1]) def test_iris_pauc(criterion, max_features, honest_prior, estimator, limit): # Check consistency on dataset iris. - clf = ForestHT( + clf = FeatureImportanceForestClassifier( criterion=criterion, random_state=0, max_features=max_features, diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index c0a988520..cf725f2a2 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -107,7 +107,6 @@ def _compute_null_distribution_perm( train_index_arr = np.arange(n_samples_train, dtype=int).reshape(-1, 1) test_index_arr = np.arange(n_samples_test, dtype=int).reshape(-1, 1) - X = np.concatenate((X_train, X_test), axis=0) null_metrics = np.zeros((n_repeats,)) for idx in range(n_repeats): @@ -181,7 +180,10 @@ def _compute_null_distribution_coleman( all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) n_samples_test = len(y_test) - assert len(all_y_pred) == 2 * n_samples_test + if len(all_y_pred) != 2 * n_samples_test: + raise RuntimeError( + "The number of samples in `all_y_pred` is not equal to 2 * n_samples_test" + ) # create two stacked index arrays of y_test resulting in [1, ..., N, 1, ..., N] y_test_ind_arr = np.hstack( From 168935c995273581507a2dd536b12487b676655a Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 18 Sep 2023 15:12:34 -0400 Subject: [PATCH 19/70] Add documentation Signed-off-by: Adam Li --- doc/api.rst | 14 ++++++++++++++ doc/whats_new/v0.2.rst | 2 ++ 2 files changed, 16 insertions(+) diff --git a/doc/api.rst b/doc/api.rst index 59a58370f..cb722bdf0 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -125,6 +125,20 @@ an API-like interface similar to :class:`~sklearn.neighbors.NearestNeighbors`. NearestNeighborsMetaEstimator +Statistical Hypothesis Testing +------------------------------ +We provide an API for performing statistical hypothesis testing using Decision +tree models. + +.. currentmodule:: sktree.stats +.. autosummary:: + :toctree: generated/ + + FeatureImportanceForestRegressor + FeatureImportanceForestClassifier + PermutationImportanceForestRegressor + PermutationImportanceForestClassifier + Experimental Functionality -------------------------- diff --git a/doc/whats_new/v0.2.rst b/doc/whats_new/v0.2.rst index 37848d99f..97c5d501f 100644 --- a/doc/whats_new/v0.2.rst +++ b/doc/whats_new/v0.2.rst @@ -32,6 +32,7 @@ Changelog - |API| Allow ``sqrt`` and ``log2`` keywords to be used for ``min_samples_split`` parameter in unsupervised forests, by `Adam Li`_ (:pr:`114`) - |Feature| Implement extended isolation forest, by `Adam Li`_ (:pr:`101`) - |Feature| Implementation of StreamDecisionForest, by `Haoyin Xu`_ and `Adam Li`_ (:pr:`116`) +- |Feature| Implementation of Permutation forests and a feature importance testing forest, by `Haoyin Xu`_, `Adam Li`_, `Sambit Panda` (:pr:`125`) Code and Documentation Contributors ----------------------------------- @@ -42,3 +43,4 @@ the project since version inception, including: * `Adam Li`_ * `SUKI-O`_ * `Haoyin Xu`_ +* `Sambit Panda`_ From 01f20853ac8c3fa437d32cfed8f018543f3dcdb2 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 10:53:16 -0400 Subject: [PATCH 20/70] Adding posterior Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 30 +- sktree/stats/__init__.py | 1 - sktree/stats/_might.py | 319 ------------------ sktree/stats/forestht.py | 240 ++++++++----- sktree/stats/meson.build | 1 - sktree/stats/permutationforest.py | 12 + sktree/stats/tests/test_forestht.py | 103 +++--- sktree/stats/tests/test_might.py | 51 --- sktree/stats/utils.py | 24 +- test_requirements.txt | 3 +- 10 files changed, 258 insertions(+), 526 deletions(-) delete mode 100644 sktree/stats/_might.py delete mode 100644 sktree/stats/tests/test_might.py diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index bd7e6c43b..0fb5add90 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,10 +2,30 @@ "cells": [ { "cell_type": "code", - "execution_count": 15, + "execution_count": 1, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: '/Users/adam2392/Documents/scikit-tree/sktree/stats/_might.py'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecial\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m expit\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msktree\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstats\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 10\u001b[0m FeatureImportanceForestClassifier,\n\u001b[1;32m 11\u001b[0m FeatureImportanceForestRegressor,\n\u001b[1;32m 12\u001b[0m PermutationForestRegressor,\n\u001b[1;32m 13\u001b[0m )\n\u001b[1;32m 15\u001b[0m seed \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m12345\u001b[39m\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/__init__.py:39\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# We are not importing the rest of scikit-tree during the build\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# process, as it may not be compiled yet\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _lib, tree, ensemble, experimental, stats\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_lib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mensemble\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_forest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 41\u001b[0m RandomForestClassifier,\n\u001b[1;32m 42\u001b[0m RandomForestRegressor,\n\u001b[1;32m 43\u001b[0m ExtraTreesClassifier,\n\u001b[1;32m 44\u001b[0m ExtraTreesRegressor,\n\u001b[1;32m 45\u001b[0m )\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mneighbors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NearestNeighborsMetaEstimator\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_might\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MIGHT, MIGHT_MV\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mforestht\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FeatureImportanceForestClassifier, FeatureImportanceForestRegressor\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpermutationforest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PermutationForestClassifier, PermutationForestRegressor\n", + "File \u001b[0;32m:1007\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:986\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:680\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", + "File \u001b[0;32m:846\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", + "File \u001b[0;32m:982\u001b[0m, in \u001b[0;36mget_code\u001b[0;34m(self, fullname)\u001b[0m\n", + "File \u001b[0;32m:1039\u001b[0m, in \u001b[0;36mget_data\u001b[0;34m(self, path)\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/Users/adam2392/Documents/scikit-tree/sktree/stats/_might.py'" + ] + } + ], "source": [ "from collections import defaultdict\n", "\n", @@ -26,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, "outputs": [], @@ -37,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -139,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index 6191b93dd..b01038036 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,3 +1,2 @@ -from ._might import MIGHT, MIGHT_MV from .forestht import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor from .permutationforest import PermutationForestClassifier, PermutationForestRegressor diff --git a/sktree/stats/_might.py b/sktree/stats/_might.py deleted file mode 100644 index 9e57ed921..000000000 --- a/sktree/stats/_might.py +++ /dev/null @@ -1,319 +0,0 @@ -import numpy as np -from joblib import Parallel, delayed -from scipy.stats import entropy -from sklearn.metrics import roc_auc_score -from sklearn.model_selection import train_test_split - -from ..ensemble import HonestForestClassifier - - -def auc_calibrator(tree, X, y, test_size=0.2, permute_y=False): - indices = np.arange(X.shape[0]) - X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split( - X, y, indices, test_size=test_size - ) - - # individual tree permutation of y labels - if permute_y: - y_train = np.random.permutation(y_train) - - tree.fit(X_train, y_train) - y_pred = tree.predict_proba(X_test)[:, 1] - - # Fill test set posteriors & set rest NaN - posterior = np.full(y.shape, np.nan) - posterior[indices_test] = y_pred - - return posterior - - -def perm_stat(clf, x, z, y, random_state=None): - if z is not None: - permuted_Z = np.random.permutation(z) - X_permutedZ = np.hstack((x, permuted_Z)) - else: - X_permutedZ = np.random.permutation(x) - - perm_stat = clf.statistic(X_permutedZ, y) - return perm_stat - - -def perm_half(clf, z, y, x_pos): - permuted_Z = np.random.permutation(z) - perm_stat, perm_pos = clf.statistic(permuted_Z, y, return_pos=True) - null_pos = forest_pos(x_pos + perm_pos, y) - null_stat = roc_auc_score(null_pos[:, 0], null_pos[:, 1], max_fpr=clf.limit) - - return null_stat - - -def pos_diff(observe_pos, perm_pos, y, limit): - total_pos = np.random.shuffle(np.concatenate((observe_pos, perm_pos))) - - half_ind = len(total_pos) * 0.5 - half_pos = total_pos[:half_ind] - end_pos = total_pos[half_ind:] - - half_pos_final = forest_pos(half_pos, y) - half_stat = roc_auc_score(half_pos_final[:, 0], half_pos_final[:, 1], max_fpr=limit) - - end_pos_final = forest_pos(end_pos, y) - end_stat = roc_auc_score(end_pos_final[:, 0], end_pos_final[:, 1], max_fpr=limit) - - return abs(half_stat - end_stat) - - -def forest_pos(posterior, y): - # Average all posteriors - posterior_final = np.nanmean(posterior, axis=0) - - # Ignore all NaN values (samples not tested) - true_final = y.ravel()[~np.isnan(posterior_final)].reshape(-1, 1) - posterior_final = posterior_final[~np.isnan(posterior_final)].reshape(-1, 1) - - return np.hstack((true_final, posterior_final)) - - -class MIGHT: - def __init__( - self, - n_estimators=500, - criterion="gini", - splitter="best", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, - random_state=None, - verbose=0, - warm_start=False, - class_weight=None, - ccp_alpha=0.0, - max_samples=None, - honest_prior="empirical", - honest_fraction=0.5, - tree_estimator=None, - limit=0.05, - ): - self.clf = HonestForestClassifier( - n_estimators=n_estimators, - criterion=criterion, - splitter=splitter, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, - random_state=random_state, - verbose=verbose, - warm_start=warm_start, - class_weight=class_weight, - ccp_alpha=ccp_alpha, - max_samples=max_samples, - honest_prior=honest_prior, - honest_fraction=honest_fraction, - tree_estimator=tree_estimator, - ) - self.limit = limit - - def statistic( - self, - x, - y, - stat="AUC", - workers=1, - test_size=0.2, - initial=True, - return_pos=False, - permute_y=False, - ): - # Initialize trees - if initial: - self.clf.fit(x[0:2], y.ravel()[0:2]) - - # Compute posteriors with train test splits - posterior = Parallel(n_jobs=workers)( - delayed(auc_calibrator)(tree, x, y.ravel(), test_size, permute_y) - for tree in (self.clf.estimators_) - ) - - posterior_final = forest_pos(posterior, y) - - if stat == "AUC": - self.stat = roc_auc_score( - posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit - ) - elif stat == "MI": - class_zero = (1 - posterior_final[:, 1]).reshape(-1, 1) - full_class = np.hstack((class_zero, posterior_final[:, 1].reshape(-1, 1))) - H_YX = np.mean(entropy(full_class, base=np.exp(1), axis=1)) - _, counts = np.unique(posterior_final[:, 0], return_counts=True) - H_Y = entropy(counts, base=np.exp(1)) - self.stat = max(H_Y - H_YX, 0) - - if return_pos: - return self.stat, posterior - - return self.stat - - def test(self, x, y, reps=1000, workers=1, random_state=None): - observe_stat = self.statistic(x, y) - - null_dist = np.array( - Parallel(n_jobs=workers)([delayed(perm_stat)(self, x, None, y) for _ in range(reps)]) - ) - pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) - - return observe_stat, null_dist, pval - - -class MIGHT_MV: - def __init__( - self, - n_estimators=500, - criterion="gini", - splitter="best", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, - random_state=None, - verbose=0, - warm_start=False, - class_weight=None, - ccp_alpha=0.0, - max_samples=None, - honest_prior="empirical", - honest_fraction=0.5, - tree_estimator=None, - limit=0.05, - ): - self.clf = HonestForestClassifier( - n_estimators=n_estimators, - criterion=criterion, - splitter=splitter, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, - random_state=random_state, - verbose=verbose, - warm_start=warm_start, - class_weight=class_weight, - ccp_alpha=ccp_alpha, - max_samples=max_samples, - honest_prior=honest_prior, - honest_fraction=honest_fraction, - tree_estimator=tree_estimator, - ) - self.limit = limit - - def statistic( - self, - x, - y, - stat="AUC", - workers=1, - test_size=0.2, - initial=True, - return_pos=False, - permute_y=False, - ): - # Initialize trees - if initial: - self.clf.fit(x[0:2], y.ravel()[0:2]) - - # Compute posteriors with train test splits - posterior = Parallel(n_jobs=workers)( - delayed(auc_calibrator)(tree, x, y.ravel(), test_size, permute_y) - for tree in (self.clf.estimators_) - ) - - posterior_final = forest_pos(posterior, y) - - if stat == "AUC": - self.stat = roc_auc_score( - posterior_final[:, 0], posterior_final[:, 1], max_fpr=self.limit - ) - elif stat == "MI": - H_YX = np.mean(entropy(posterior_final[:, 1], base=np.exp(1))) - _, counts = np.unique(posterior_final[:, 0], return_counts=True) - H_Y = entropy(counts, base=np.exp(1)) - self.stat = max(H_Y - H_YX, 0) - - if return_pos: - return self.stat, posterior - - return self.stat - - def test(self, x, z, y, reps=1000, workers=1, random_state=None): - XZ = np.hstack((x, z)) - observe_stat = self.statistic(XZ, y) - - null_dist = np.array( - Parallel(n_jobs=workers)([delayed(perm_stat)(self, x, z, y) for _ in range(reps)]) - ) - pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) - - return observe_stat, null_dist, pval - - def test_twin(self, x, z, y, reps=1000, workers=1, random_state=None): - x_stat, x_pos = self.statistic(x, y, return_pos=True) - - # TODO: determine whether we need the forest - - z_stat, z_pos = self.statistic(z, y, return_pos=True) - - observe_pos = forest_pos(x_pos + z_pos, y) - observe_stat = roc_auc_score(observe_pos[:, 0], observe_pos[:, 1], max_fpr=self.limit) - - null_dist = np.array( - Parallel(n_jobs=workers)([delayed(perm_half)(self, z, y, x_pos) for _ in range(reps)]) - ) - pval = (1 + (null_dist >= observe_stat).sum()) / (1 + reps) - - return observe_stat, null_dist, pval - - def test_diff(self, x, z, y, reps=1000, workers=1): - XZ = np.hstack((x, z)) - observe_stat, observe_pos = self.statistic(XZ, y, return_pos=True) - - # Compute statistic for permuted sets - permuted_Z = np.random.permutation(z) - X_permutedZ = np.hstack((x, permuted_Z)) - perm_stat, perm_pos = self.statistic(X_permutedZ, y, return_pos=True) - - # Bootstrap sample the posterior from the two forests - null_stats = np.array( - Parallel(n_jobs=workers)( - [delayed(pos_diff)(observe_pos, perm_pos, y, limit=self.limit) for _ in range(reps)] - ) - ) - - stat = observe_stat - perm_stat - - pval = (1 + (null_stats >= stat).sum()) / (1 + reps) - return stat, null_stats, pval diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index cbd31fe56..8c329dbae 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,7 +1,10 @@ import numpy as np from numpy.typing import ArrayLike -from sklearn.base import MetaEstimatorMixin, clone +from sklearn.base import MetaEstimatorMixin, clone, is_classifier +from sklearn.ensemble._forest import ForestClassifier as sklearnForestClassifier +from sklearn.ensemble._forest import ForestRegressor as sklearnForestRegressor from sklearn.model_selection import train_test_split +from sklearn.utils.multiclass import type_of_target from sklearn.utils.validation import _is_fitted, check_X_y from sktree._lib.sklearn.ensemble._forest import ( @@ -15,6 +18,8 @@ from .utils import ( METRIC_FUNCTIONS, + POSITIVE_METRICS, + POSTERIOR_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, train_tree, @@ -43,7 +48,7 @@ def __init__( ccp_alpha=0.0, max_samples=None, permute_per_tree=True, - **estimator_kwargs, + sample_dataset_per_tree=True, ): self.estimator = estimator self.n_jobs = n_jobs @@ -63,8 +68,8 @@ def __init__( self.warm_start = warm_start self.ccp_alpha = ccp_alpha self.max_samples = max_samples - self.estimator_kwargs = estimator_kwargs self.permute_per_tree = permute_per_tree + self.sample_dataset_per_tree = sample_dataset_per_tree def reset(self): class_attributes = dir(type(self)) @@ -120,9 +125,6 @@ def statistic( if y.ndim != 2: y = y.reshape(-1, 1) - if metric not in REGRESSOR_METRICS: - raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') - if covariate_index is None: self.estimator_ = self._get_estimator() estimator = self.estimator_ @@ -130,6 +132,9 @@ def statistic( self.permuted_estimator_ = clone(self.estimator_) estimator = self.permuted_estimator_ + if not is_classifier(self.estimator_) and metric not in REGRESSOR_METRICS: + raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') + return self._statistic( estimator, X, @@ -193,14 +198,6 @@ def test( indices = np.arange(X.shape[0]) self.test_size_ = int(test_size * X.shape[0]) - # if not self.permute_per_tree: - # # train/test split - # # XXX: could add stratifying by y when y is classification - # indices_train, indices_test = train_test_split( - # indices, test_size=test_size, shuffle=True - # ) - # self.indices_train_ = indices_train - # self.indices_test_ = indices_test indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) self.indices_train_ = indices_train self.indices_test_ = indices_test @@ -245,8 +242,13 @@ def test( seed=self.random_state, ) else: + if self.permute_per_tree: + y_test = y + else: + y_test = y[self.indices_test_, :] + print(y.shape, observe_posteriors.shape, permute_posteriors.shape) metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[self.indices_test_, :], + y_test=y_test, y_pred_proba_normal=observe_posteriors, y_pred_proba_perm=permute_posteriors, metric=metric, @@ -261,7 +263,10 @@ def test( null_dist = metric_star_pi - metric_star # compute pvalue - pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) + if metric in POSITIVE_METRICS: + pvalue = (1 + (null_dist <= observe_stat).sum()) / (1 + n_repeats) + else: + pvalue = (1 + (null_dist >= observe_stat).sum()) / (1 + n_repeats) if return_posteriors: self.observe_posteriors_ = observe_posteriors @@ -276,6 +281,8 @@ def test( class FeatureImportanceForestRegressor(BaseForestHT): """Forest hypothesis testing with continuous `y` variable. + Implements the algorithm described in :footcite:`coleman2022scalable`. + The dataset is split into a training and testing dataset initially. Then there are two forests that are trained: one on the original dataset, and one on the permuted dataset. The dataset is either permuted once, or independently for @@ -443,6 +450,12 @@ class FeatureImportanceForestRegressor(BaseForestHT): - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0.0, 1.0]`. + permute_per_tree : bool, default=True + Whether to permute the covariate index per tree or per forest. + + sample_dataset_per_tree : bool, default=False + Whether to sample the dataset per tree or per forest. + Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -456,6 +469,10 @@ class FeatureImportanceForestRegressor(BaseForestHT): null_dist_ : ArrayLike of shape (n_repeats,) The null distribution of the test statistic. + + References + ---------- + .. footbibliography:: """ def __init__( @@ -479,8 +496,7 @@ def __init__( ccp_alpha=0.0, max_samples=None, permute_per_tree=True, - sample_dataset_per_tree=False, - **estimator_kwargs, + sample_dataset_per_tree=True, ): super().__init__( estimator=estimator, @@ -501,7 +517,6 @@ def __init__( warm_start=warm_start, ccp_alpha=ccp_alpha, max_samples=max_samples, - **estimator_kwargs, ) self.permute_per_tree = permute_per_tree self.sample_dataset_per_tree = sample_dataset_per_tree @@ -526,9 +541,8 @@ def _get_estimator(self): warm_start=self.warm_start, ccp_alpha=self.ccp_alpha, max_samples=self.max_samples, - **self.estimator_kwargs, ) - elif isinstance(self.estimator, ForestRegressor): + elif not isinstance(self.estimator, (ForestRegressor, sklearnForestRegressor)): raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator)}") else: estimator_ = self.estimator @@ -585,6 +599,11 @@ def _statistic( X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) estimator.fit(X_dummy, unique_y) + if hasattr(self, "test_size_"): + test_size = self.test_size_ + else: + test_size = 0.2 # type: ignore + # now initialize posterior array as (n_trees, n_samples, n_outputs) posterior_arr = np.full((self.n_estimators, n_samples, estimator.n_outputs_), np.nan) # Fit each tree and compute posteriors with train test splits @@ -592,7 +611,7 @@ def _statistic( # sample train/test dataset for each tree indices_train, indices_test = train_test_split( np.arange(n_samples, dtype=int), - test_size=self.test_size_, + test_size=test_size, shuffle=True, random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), ) @@ -653,6 +672,8 @@ def _statistic( class FeatureImportanceForestClassifier(BaseForestHT): """Forest hypothesis testing with categorical `y` variable. + Implements the algorithm described in :footcite:`coleman2022scalable`. + The dataset is split into a training and testing dataset initially. Then there are two forests that are trained: one on the original dataset, and one on the permuted dataset. The dataset is either permuted once, or independently for @@ -820,6 +841,12 @@ class FeatureImportanceForestClassifier(BaseForestHT): - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0.0, 1.0]`. + permute_per_tree : bool, default=True + Whether to permute the covariate index per tree or per forest. + + sample_dataset_per_tree : bool, default=False + Whether to sample the dataset per tree or per forest. + Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -833,6 +860,10 @@ class FeatureImportanceForestClassifier(BaseForestHT): null_dist_ : ArrayLike of shape (n_repeats,) The null distribution of the test statistic. + + References + ---------- + .. footbibliography:: """ def __init__( @@ -856,8 +887,7 @@ def __init__( ccp_alpha=0.0, max_samples=None, permute_per_tree=True, - sample_dataset_per_tree=False, - **estimator_kwargs, + sample_dataset_per_tree=True, ): super().__init__( estimator=estimator, @@ -878,7 +908,6 @@ def __init__( warm_start=warm_start, ccp_alpha=ccp_alpha, max_samples=max_samples, - **estimator_kwargs, ) self.permute_per_tree = permute_per_tree self.sample_dataset_per_tree = sample_dataset_per_tree @@ -903,11 +932,14 @@ def _get_estimator(self): warm_start=self.warm_start, ccp_alpha=self.ccp_alpha, max_samples=self.max_samples, - **self.estimator_kwargs, ) - elif isinstance(self.estimator, ForestClassifier): + elif not isinstance(self.estimator, (ForestClassifier, sklearnForestClassifier)): raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator)}") else: + # self.estimator is an instance of a ForestEstimator, so we should verify that all + # the parameters are set correctly + # XXX: implement checks + estimator_ = self.estimator return estimator_ @@ -926,70 +958,89 @@ def _statistic( rng = np.random.default_rng(self.random_state) n_samples = X.shape[0] - if self.permute_per_tree and not self.sample_dataset_per_tree: - # first run a dummy fit on the samples to initialize the - # internal data structure of the forest - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - - # Fit each tree and compute posteriors with train test splits - n_samples_test = len(self.indices_test_) - - # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) - for idx in range(self.n_estimators): - tree: DecisionTreeClassifier = estimator.estimators_[idx] - train_tree( - tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index - ) + if metric in POSTERIOR_FUNCTIONS: + predict_posteriors = True + else: + predict_posteriors = False - y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) + if hasattr(self, "test_size_"): + test_size = self.test_size_ + else: + test_size = 0.2 # type: ignore - # Fill test set posteriors & set rest NaN - posterior_arr[idx, ...] = y_pred # posterior + if not _is_fitted(estimator): + unique_y = np.unique(y) + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + if estimator.n_outputs_ > 1 and metric == "auc": + raise ValueError("AUC metric is not supported for multi-output classification") - y_true_final = y[self.indices_test_, :] - # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() - elif self.permute_per_tree and self.sample_dataset_per_tree: + if self.permute_per_tree: # first run a dummy fit on the samples to initialize the # internal data structure of the forest - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - - # now initialize posterior array as (n_trees, n_samples, n_outputs) - posterior_arr = np.full((self.n_estimators, n_samples, estimator.n_outputs_), np.nan) - # Fit each tree and compute posteriors with train test splits - for idx in range(self.n_estimators): - # sample train/test dataset for each tree - indices_train, indices_test = train_test_split( - np.arange(n_samples, dtype=int), - test_size=self.test_size_, - shuffle=True, - random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), - ) - tree = estimator.estimators_[idx] - train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) - - y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - - posterior_arr[idx, indices_test, :] = y_pred # posterior - - # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - - # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] - - # Ignore all NaN values (samples not tested) - y_true_final = y[nonnan_indices, :] - posterior_final = posterior_final[nonnan_indices, :] - samples = nonnan_indices + if self.sample_dataset_per_tree: + # Fit each tree and compute posteriors with train test splits + n_samples_test = len(self.indices_test_) + else: + n_samples_test = n_samples + + if predict_posteriors: + posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_classes_)) + else: + # now initialize posterior array as (n_trees, n_samples_test, n_outputs) + posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) + + if self.sample_dataset_per_tree: + for idx in range(self.n_estimators): + tree: DecisionTreeClassifier = estimator.estimators_[idx] + train_tree( + tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index + ) + + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + y_pred = tree.predict_proba(X[self.indices_test_, :]) + else: + y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + posterior_arr[idx, ...] = y_pred # posterior + + y_true_final = y[self.indices_test_, :] + # Average all posteriors + posterior_final = np.nanmean(posterior_arr, axis=0) + samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() + else: + # Fit each tree and compute posteriors with train test splits + for idx in range(self.n_estimators): + # sample train/test dataset for each tree + indices_train, indices_test = train_test_split( + np.arange(n_samples, dtype=int), + test_size=test_size, + shuffle=True, + random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), + ) + tree = estimator.estimators_[idx] + train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + y_pred = tree.predict_proba(X[indices_test, :]) + else: + y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) + + posterior_arr[idx, indices_test, :] = y_pred # posterior + + # Average all posteriors (n_samples, n_outputs) + posterior_final = np.nanmean(posterior_arr, axis=0) + + # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + + # Ignore all NaN values (samples not tested) + y_true_final = y[nonnan_indices, :] + posterior_final = posterior_final[nonnan_indices, :] + samples = nonnan_indices else: X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] @@ -1006,13 +1057,28 @@ def _statistic( X_train[:, covariate_index] = X_train[index_arr, covariate_index] estimator.fit(X_train, y_train) - y_pred = estimator.predict(X_test) + + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + y_pred = estimator.predict_proba(X_test) + else: + y_pred = estimator.predict(X_test) # set variables to compute metric samples = self.indices_test_ y_true_final = y_test posterior_final = y_pred + if metric == "auc": + # at this point, posterior_final is the predicted posterior for only the positive class + # as more than one output is not supported. + if type_of_target(y_true_final) == "binary": + posterior_final = posterior_final[:, 1] + else: + raise RuntimeError( + f"AUC metric is not supported for {type_of_target(y_true_final)} targets." + ) + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) diff --git a/sktree/stats/meson.build b/sktree/stats/meson.build index 0b45a0ada..fd81fea36 100644 --- a/sktree/stats/meson.build +++ b/sktree/stats/meson.build @@ -1,6 +1,5 @@ python_sources = [ '__init__.py', - '_might.py', 'forestht.py', 'utils.py', 'permutationforest.py', diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 55b79833b..3808cf503 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -291,6 +291,12 @@ class PermutationForestRegressor(BasePermutationForest): is compared to the original random forest that was computed on the regular non-permuted data. + .. warning:: Permutation testing with forests is computationally expensive. + As a result, if you are testing for the importance of feature sets, consider + using :class:`sktree.stats.FeatureImportanceForestRegressor` or + :class:`sktree.stats.FeatureImportanceForestClassifier` instead, which is + much more computationally efficient. + .. note:: This does not allow testing on the posteriors. Parameters @@ -571,6 +577,12 @@ class PermutationForestClassifier(BasePermutationForest): is compared to the original random forest that was computed on the regular non-permuted data. + .. warning:: Permutation testing with forests is computationally expensive. + As a result, if you are testing for the importance of feature sets, consider + using :class:`sktree.stats.FeatureImportanceForestRegressor` or + :class:`sktree.stats.FeatureImportanceForestClassifier` instead, which is + much more computationally efficient. + .. note:: This does not allow testing on the posteriors. Parameters diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index d02f09fa8..1c161cbf2 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,8 +1,10 @@ import numpy as np import pytest +from flaky import flaky from scipy.special import expit from sklearn import datasets +from sktree import HonestForestClassifier from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import ( FeatureImportanceForestClassifier, @@ -12,10 +14,11 @@ ) from sktree.tree import ObliqueDecisionTreeClassifier -# load the iris dataset +# load the iris dataset (n_samples, 4) # and randomly permute it iris = datasets.load_iris() -rng = np.random.RandomState(1) +seed = 12345 +rng = np.random.default_rng(seed) # remove third class iris_X = iris.data[iris.target != 2] @@ -26,17 +29,7 @@ iris_y = iris_y[p] -seed = 12345 - - -def test_forestht_proper_attributes(): - """Forest HTs should have n_classes_ and n_outputs_ properly set. - - This requires the first dummy fit to always get all classes. - """ - pass - - +@flaky(max_runs=3) @pytest.mark.slowtest @pytest.mark.parametrize( "hypotester, model_kwargs, n_samples, n_repeats, test_size", @@ -44,26 +37,27 @@ def test_forestht_proper_attributes(): [ PermutationForestRegressor, { - "max_features": 1.0, + "max_features": "sqrt", "random_state": seed, - "n_estimators": 50, + "n_estimators": 75, "n_jobs": -1, }, - 550, + 300, 50, 0.1, ], [ FeatureImportanceForestRegressor, { - "max_features": 1.0, + "max_features": "sqrt", "random_state": seed, "n_estimators": 125, "permute_per_tree": True, + "sample_dataset_per_tree": True, "n_jobs": -1, }, - 600, - 200, + 300, + 500, 0.1, ], ], @@ -75,12 +69,10 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon """ - beta = 10.0 - sigma = 0.5 + beta = 15.0 + sigma = 0.05 metric = "mse" - rng = np.random.default_rng(seed) - # sample covariates X_15 = rng.uniform(0, 1, size=(n_samples, 5)) X_610 = np.zeros((n_samples, 5)) @@ -118,6 +110,7 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) assert pvalue > 0.05, f"pvalue: {pvalue}" +@flaky(max_runs=3) @pytest.mark.slowtest @pytest.mark.parametrize( "hypotester, model_kwargs, n_samples, n_repeats, test_size", @@ -127,10 +120,10 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) { "max_features": "sqrt", "random_state": seed, - "n_estimators": 75, + "n_estimators": 50, "n_jobs": -1, }, - 500, + 600, 50, 1.0 / 6, ], @@ -144,8 +137,8 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) "sample_dataset_per_tree": True, "n_jobs": -1, }, - 500, - 100, + 600, + 200, 1.0 / 6, ], ], @@ -157,13 +150,11 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) """ - beta = 5.0 + beta = 10.0 metric = "mse" - n = 200 # Number of time steps - ar_coefficient = 0.0015 - - rng = np.random.default_rng(seed) + n = 100 # Number of time steps + ar_coefficient = 0.015 X = np.zeros((n_samples, n)) for idx in range(n_samples): @@ -210,7 +201,6 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @pytest.mark.parametrize("criterion", ["gini", "entropy"]) -@pytest.mark.parametrize("max_features", [None, "sqrt"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"]) @pytest.mark.parametrize( "estimator", @@ -221,30 +211,47 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, ], ) @pytest.mark.parametrize("limit", [0.05, 0.1]) -def test_iris_pauc(criterion, max_features, honest_prior, estimator, limit): +def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): + max_features = "sqrt" + n_repeats = 200 + n_estimators = 50 + # Check consistency on dataset iris. clf = FeatureImportanceForestClassifier( criterion=criterion, random_state=0, max_features=max_features, - n_estimators=100, - honest_prior=honest_prior, - tree_estimator=estimator, + n_estimators=n_estimators, + estimator=HonestForestClassifier( + n_estimators=n_estimators, tree_estimator=estimator, honest_prior=honest_prior + ), + n_jobs=-1, + sample_dataset_per_tree=True, + permute_per_tree=True, ) - score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) - assert score >= 0.9, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) - # now add completely uninformative feature - X = np.hstack((iris_X, rng.standard_normal(size=(iris_X.shape[0], 1)))) + X = np.hstack((iris_X, rng.standard_normal(size=(iris_X.shape[0], 4)))) - # test for unimportant feature - test_size = 0.2 + # test for unimportant feature set + test_size = 0.1 clf.reset() - stat, pvalue = clf.test(X, iris_y, [X.shape[1] - 1], test_size=test_size, metric="auc") + stat, pvalue = clf.test( + X, + iris_y, + np.arange(iris_X.shape[0], X.shape[1], dtype=int).tolist(), + n_repeats=n_repeats, + test_size=test_size, + metric="auc", + ) print(pvalue) - # assert pvalue > 0.05, f"pvalue: {pvalue}" + assert pvalue > 0.05, f"pvalue: {pvalue}" - stat, pvalue = clf.test(X, iris_y, [2, 3], test_size=test_size, metric="auc") + # test for important features that are permuted + stat, pvalue = clf.test( + X, iris_y, [0, 1, 2, 3], n_repeats=n_repeats, test_size=test_size, metric="auc" + ) print(pvalue) - # assert pvalue < 0.05, f"pvalue: {pvalue}" - assert False + assert pvalue < 0.05, f"pvalue: {pvalue}" + + score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) + assert score >= 0.8, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) diff --git a/sktree/stats/tests/test_might.py b/sktree/stats/tests/test_might.py deleted file mode 100644 index ff613c571..000000000 --- a/sktree/stats/tests/test_might.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -import pytest -from sklearn import datasets - -from sktree._lib.sklearn.tree import DecisionTreeClassifier -from sktree.stats import MIGHT -from sktree.tree import ObliqueDecisionTreeClassifier - -# load the iris dataset -# and randomly permute it -iris = datasets.load_iris() -rng = np.random.RandomState(1) - -# remove third class -iris_X = iris.data[iris.target != 2] -iris_y = iris.target[iris.target != 2] - -p = rng.permutation(iris_X.shape[0]) -iris_X = iris_X[p] -iris_y = iris_y[p] - - -@pytest.mark.parametrize("criterion", ["gini", "entropy"]) -@pytest.mark.parametrize("max_features", [None, 2]) -@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore", "error"]) -@pytest.mark.parametrize( - "estimator", - [ - None, - DecisionTreeClassifier(), - ObliqueDecisionTreeClassifier(), - ], -) -@pytest.mark.parametrize("limit", [0.05, 0.1]) -def test_iris(criterion, max_features, honest_prior, estimator, limit): - # Check consistency on dataset iris. - clf = MIGHT( - criterion=criterion, - random_state=0, - max_features=max_features, - n_estimators=10, - honest_prior=honest_prior, - tree_estimator=estimator, - limit=limit, - ) - if honest_prior == "error": - with pytest.raises(ValueError, match="honest_prior error not a valid input."): - clf.statistic(iris_X, iris_y) - else: - score = clf.statistic(iris_X, iris_y, stat="AUC") - assert score >= 0.9, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index cf725f2a2..9b1c3debe 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -25,10 +25,15 @@ def _mutual_information(y_true, y_pred_proba): METRIC_FUNCTIONS = { "mse": mean_squared_error, "mae": mean_absolute_error, + "balanced_accuracy": balanced_accuracy_score, "auc": roc_auc_score, "mi": _mutual_information, - "balanced_accuracy": balanced_accuracy_score, } + +POSTERIOR_FUNCTIONS = ("mi", "auc") + +POSITIVE_METRICS = ("mi", "auc", "balanced_accuracy") + REGRESSOR_METRICS = ("mse", "mae") @@ -172,8 +177,6 @@ def _compute_null_distribution_coleman( An array of the metrics computed on the other half of the trees. """ rng = np.random.default_rng(seed) - # X_test, y_test = check_X_y(X_test, y_test, copy=True, ensure_2d=True, multi_output=True) - metric_func = METRIC_FUNCTIONS[metric] # sample two sets of equal number of trees from the combined forest these are the posteriors @@ -181,8 +184,12 @@ def _compute_null_distribution_coleman( n_samples_test = len(y_test) if len(all_y_pred) != 2 * n_samples_test: + print("y_pred_proba_perm: ", y_pred_proba_perm.shape) + print("y_pred_proba: ", y_pred_proba_normal.shape) + raise RuntimeError( - "The number of samples in `all_y_pred` is not equal to 2 * n_samples_test" + f"The number of samples in `all_y_pred` {len(all_y_pred)} " + f"is not equal to 2 * n_samples_test {2 * n_samples_test}" ) # create two stacked index arrays of y_test resulting in [1, ..., N, 1, ..., N] @@ -193,15 +200,6 @@ def _compute_null_distribution_coleman( # create index array of [1, ..., 2N] to slice into `all_y_pred` y_pred_ind_arr = np.arange((2 * n_samples_test), dtype=int) - # # get the indices of the samples that we have a posterior for, so each element - # # is an index into `y_test` - # all_samples_pred = np.concatenate((normal_samples, perm_samples), axis=0) - - # n_samples_final = len(all_samples_pred) - - # pre-allocate memory for the index array - # index_arr = np.arange(n_samples_final, dtype=int) - metric_star = np.zeros((n_repeats,)) metric_star_pi = np.zeros((n_repeats,)) for idx in range(n_repeats): diff --git a/test_requirements.txt b/test_requirements.txt index 0b025c336..abb85b9e2 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -2,4 +2,5 @@ pytest pytest-cov memory_profiler joblib -tqdm \ No newline at end of file +tqdm +flaky \ No newline at end of file From e9d07d869c696dd7a8a9e3153f8cba2d26526b7e Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 10:57:07 -0400 Subject: [PATCH 21/70] Fix mesonb uild Signed-off-by: Adam Li --- sktree/stats/tests/meson.build | 1 - 1 file changed, 1 deletion(-) diff --git a/sktree/stats/tests/meson.build b/sktree/stats/tests/meson.build index 22cc87bcb..2f18f5a65 100644 --- a/sktree/stats/tests/meson.build +++ b/sktree/stats/tests/meson.build @@ -1,6 +1,5 @@ python_sources = [ '__init__.py', - 'test_might.py', 'test_forestht.py' ] From 1dcd4f6736b5267b17809d02a96564d795978f9b Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 11:17:30 -0400 Subject: [PATCH 22/70] Fix unit-test Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 214 +++--------------- sktree/stats/tests/test_forestht.py | 2 +- 2 files changed, 32 insertions(+), 184 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 0fb5add90..9f3e15e28 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,30 +2,10 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: '/Users/adam2392/Documents/scikit-tree/sktree/stats/_might.py'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 9\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscipy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecial\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m expit\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msktree\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mstats\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 10\u001b[0m FeatureImportanceForestClassifier,\n\u001b[1;32m 11\u001b[0m FeatureImportanceForestRegressor,\n\u001b[1;32m 12\u001b[0m PermutationForestRegressor,\n\u001b[1;32m 13\u001b[0m )\n\u001b[1;32m 15\u001b[0m seed \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m12345\u001b[39m\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/__init__.py:39\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# We are not importing the rest of scikit-tree during the build\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# process, as it may not be compiled yet\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 39\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _lib, tree, ensemble, experimental, stats\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_lib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msklearn\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mensemble\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_forest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 41\u001b[0m RandomForestClassifier,\n\u001b[1;32m 42\u001b[0m RandomForestRegressor,\n\u001b[1;32m 43\u001b[0m ExtraTreesClassifier,\n\u001b[1;32m 44\u001b[0m ExtraTreesRegressor,\n\u001b[1;32m 45\u001b[0m )\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mneighbors\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NearestNeighborsMetaEstimator\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_might\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MIGHT, MIGHT_MV\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mforestht\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FeatureImportanceForestClassifier, FeatureImportanceForestRegressor\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpermutationforest\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PermutationForestClassifier, PermutationForestRegressor\n", - "File \u001b[0;32m:1007\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:986\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:680\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", - "File \u001b[0;32m:846\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", - "File \u001b[0;32m:982\u001b[0m, in \u001b[0;36mget_code\u001b[0;34m(self, fullname)\u001b[0m\n", - "File \u001b[0;32m:1039\u001b[0m, in \u001b[0;36mget_data\u001b[0;34m(self, path)\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/Users/adam2392/Documents/scikit-tree/sktree/stats/_might.py'" - ] - } - ], + "outputs": [], "source": [ "from collections import defaultdict\n", "\n", @@ -46,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, "outputs": [], @@ -57,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -159,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -222,7 +202,12 @@ "\n", " # test for X_2 important\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), covariate_index=[1], test_size=test_size, n_repeats=n_repeats, metric=metric\n", + " X.copy(),\n", + " y.copy(),\n", + " covariate_index=[1],\n", + " test_size=test_size,\n", + " n_repeats=n_repeats,\n", + " metric=metric,\n", " )\n", " pvalue_dict[\"X2\"] = pvalue\n", " print(\"X2: \", pvalue)\n", @@ -261,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 7, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -464,7 +449,7 @@ " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", "\n", - " elements_dict = correlated_logit_model(beta, new_seed)\n", + " elements_dict = linear_model_ancova(sigma_factor, new_seed)\n", " for key, value in elements_dict.items():\n", " pvalue_dict[key].append(value)\n", " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", @@ -474,13 +459,13 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "id": "9e60fac2-3b20-493e-886a-892d572a28c6", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -500,7 +485,7 @@ " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", " ax.legend()\n", "fig.suptitle(\n", - " \"Linear ANCOVA model with Coleman Forest (Permutation per tree and sample dataset per tree)\"\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", ")\n", "fig.tight_layout()" ] @@ -610,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", "metadata": { "scrolled": true @@ -621,138 +606,18 @@ "output_type": "stream", "text": [ "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", "X1: 1.0\n", "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", + "X2: 1.0\n", "X1: 0.004975124378109453\n", "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", + "X2: 1.0\n", "X1: 1.0\n", "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", + "X2: 1.0\n", "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", + "X2: 1.0\n", "X1: 0.004975124378109453\n", "X500: 0.004975124378109453\n" ] @@ -763,32 +628,24 @@ "rng = np.random.default_rng(seed)\n", "\n", "beta_space = np.hstack((np.linspace(0.01, 2.5, 8), np.linspace(5, 20, 7)))\n", - "for sigma_factor in j_space:\n", + "for beta in beta_space:\n", " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", "\n", - " elements_dict = correlated_logit_model(sigma_factor, new_seed)\n", + " elements_dict = correlated_logit_model(beta, new_seed)\n", " for key, value in elements_dict.items():\n", " pvalue_dict[key].append(value)\n", - " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + " pvalue_dict[\"sigma_factor\"].append(beta)\n", "\n", "df = pd.DataFrame(pvalue_dict)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "done\n" - ] - } - ], + "outputs": [], "source": [ "print(\"done\")" ] @@ -885,21 +742,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", @@ -910,7 +756,9 @@ " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (beta)\")\n", " ax.legend()\n", - "fig.suptitle(\"Correlated Logit model with Coleman Forest (permute per tree)\")\n", + "fig.suptitle(\n", + " \"Correlated Logit model with FeatureImportanceForestClassifier (permute per tree and sample dataset)\"\n", + ")\n", "fig.tight_layout()" ] }, diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 1c161cbf2..2e4f3d375 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -214,7 +214,7 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): max_features = "sqrt" n_repeats = 200 - n_estimators = 50 + n_estimators = 100 # Check consistency on dataset iris. clf = FeatureImportanceForestClassifier( From 37b1648526e115fbacd270522283f85ced8ef45a Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 11:41:49 -0400 Subject: [PATCH 23/70] Fix docs errors Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 68 ++++++++- doc/api.rst | 4 +- doc/conf.py | 7 + sktree/stats/__init__.py | 7 + sktree/stats/forestht.py | 84 ++-------- sktree/stats/permutationforest.py | 144 +++--------------- 6 files changed, 113 insertions(+), 201 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 9f3e15e28..7c812d6d8 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -619,7 +619,73 @@ "X500: 1.0\n", "X2: 1.0\n", "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n" + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n" ] } ], diff --git a/doc/api.rst b/doc/api.rst index cb722bdf0..ecaa78ecb 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -136,8 +136,8 @@ tree models. FeatureImportanceForestRegressor FeatureImportanceForestClassifier - PermutationImportanceForestRegressor - PermutationImportanceForestClassifier + PermutationForestClassifier + PermutationForestRegressor Experimental Functionality diff --git a/doc/conf.py b/doc/conf.py index 689ee5c94..48bc28c6e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -87,6 +87,7 @@ # TODO: figure out why these are raising an error? nitpick_ignore = [ ("py:mod", "sktree.tree"), + ("py:mod", "sktree.stats"), ] # The name of a reST role (builtin or Sphinx extension) to use as the default @@ -204,6 +205,7 @@ "matrix", "Ignored", "UnsupervisedSplitter", + "n_repeats", # from sklearn "such", "arrays", @@ -230,6 +232,11 @@ "joblib.parallel_backend", "length", "instances", + "decision_path", + "n_samples_final", + "predict", + "fit", + "apply", } # validation diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py index b01038036..1e1538c46 100644 --- a/sktree/stats/__init__.py +++ b/sktree/stats/__init__.py @@ -1,2 +1,9 @@ from .forestht import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor from .permutationforest import PermutationForestClassifier, PermutationForestRegressor + +__all__ = [ + "FeatureImportanceForestClassifier", + "FeatureImportanceForestRegressor", + "PermutationForestClassifier", + "PermutationForestRegressor", +] diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 8c329dbae..410a53974 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -100,11 +100,13 @@ def statistic( covariate_index : ArrayLike, optional of shape (n_covariates,) The index array of covariates to shuffle, by default None. metric : str, optional - The metric to compute, by default "auc". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. + The metric to compute, by default "mse". return_posteriors : bool, optional Whether or not to return the posteriors, by default False. + check_input : bool, optional + Whether or not to check the input, by default True. + **metric_kwargs : dict, optional + Additional keyword arguments to pass to the metric function. Returns ------- @@ -184,6 +186,8 @@ def test( Number of times to sample the null distribution, by default 1000. return_posteriors : bool, optional Whether or not to return the posteriors, by default False. + **metric_kwargs : dict, optional + Additional keyword arguments to pass to the metric function. Returns ------- @@ -307,11 +311,6 @@ class FeatureImportanceForestRegressor(BaseForestHT): "gini" for the Gini impurity and "entropy" for the information gain. Note: this parameter is tree-specific. - splitter : {"best", "random"}, default="best" - The strategy used to choose the split at each node. Supported - strategies are "best" to choose the best split and "random" to choose - the best random split. - max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -388,11 +387,7 @@ class FeatureImportanceForestRegressor(BaseForestHT): Only available if bootstrap=True. n_jobs : int, default=None - The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, - :meth:`decision_path` and :meth:`apply` are all parallelized over the - trees. ``None`` means 1 unless in a `joblib.parallel_backend` - context. ``-1`` means using all processors. See :term:`Glossary - ` for more details. + The number of jobs to run in parallel. random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used @@ -409,32 +404,6 @@ class FeatureImportanceForestRegressor(BaseForestHT): and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. - class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ - default=None - Weights associated with classes in the form ``{class_label: weight}``. - If not given, all classes are supposed to have weight one. For - multi-output problems, a list of dicts can be provided in the same - order as the columns of y. - - Note that for multioutput (including multilabel) weights should be - defined for each class of every column in its own dict. For example, - for four-class multilabel classification weights should be - [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of - [{1:1}, {2:5}, {3:1}, {4:1}]. - - The "balanced" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies in the input data - as ``n_samples / (n_classes * np.bincount(y))`` - - The "balanced_subsample" mode is the same as "balanced" except that - weights are computed based on the bootstrap sample for every tree - grown. - - For multi-output, the weights of each column of y will be multiplied. - - Note that these weights will be multiplied with sample_weight (passed - through the fit method) if sample_weight is specified. - ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than @@ -698,11 +667,6 @@ class FeatureImportanceForestClassifier(BaseForestHT): "gini" for the Gini impurity and "entropy" for the information gain. Note: this parameter is tree-specific. - splitter : {"best", "random"}, default="best" - The strategy used to choose the split at each node. Supported - strategies are "best" to choose the best split and "random" to choose - the best random split. - max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -779,11 +743,7 @@ class FeatureImportanceForestClassifier(BaseForestHT): Only available if bootstrap=True. n_jobs : int, default=None - The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, - :meth:`decision_path` and :meth:`apply` are all parallelized over the - trees. ``None`` means 1 unless in a `joblib.parallel_backend` - context. ``-1`` means using all processors. See :term:`Glossary - ` for more details. + The number of jobs to run in parallel. random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used @@ -800,32 +760,6 @@ class FeatureImportanceForestClassifier(BaseForestHT): and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. - class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ - default=None - Weights associated with classes in the form ``{class_label: weight}``. - If not given, all classes are supposed to have weight one. For - multi-output problems, a list of dicts can be provided in the same - order as the columns of y. - - Note that for multioutput (including multilabel) weights should be - defined for each class of every column in its own dict. For example, - for four-class multilabel classification weights should be - [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of - [{1:1}, {2:5}, {3:1}, {4:1}]. - - The "balanced" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies in the input data - as ``n_samples / (n_classes * np.bincount(y))`` - - The "balanced_subsample" mode is the same as "balanced" except that - weights are computed based on the bootstrap sample for every tree - grown. - - For multi-output, the weights of each column of y will be multiplied. - - Note that these weights will be multiplied with sample_weight (passed - through the fit method) if sample_weight is specified. - ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 3808cf503..2040f7003 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -31,7 +31,6 @@ def __init__( warm_start=False, ccp_alpha=0.0, max_samples=None, - **estimators_kwargs, ): self.estimator = estimator self.n_jobs = n_jobs @@ -51,7 +50,6 @@ def __init__( self.warm_start = warm_start self.ccp_alpha = ccp_alpha self.max_samples = max_samples - self.estimator_kwargs = estimators_kwargs def reset(self): class_attributes = dir(type(self)) @@ -141,10 +139,14 @@ def statistic( The index array of covariates to shuffle, by default None. metric : str, optional The metric to compute, by default "mse". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. return_posteriors : bool, optional Whether or not to return the posteriors, by default False. + check_input : bool, optional + Whether or not to check the input, by default True. + seed : int, optional + The random seed to use, by default None. + **metric_kwargs : dict, optional + Keyword arguments to pass to the metric function. Returns ------- @@ -221,13 +223,17 @@ def test( by default 0.2. n_repeats : int, optional Number of times to sample the null distribution, by default 1000. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + **metric_kwargs : dict, optional + Keyword arguments to pass to the metric function. Returns ------- observe_stat : float Observed test statistic. pvalue : float - p-value of the test. + Pvalue of the test. """ X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) if y.ndim != 2: @@ -292,10 +298,10 @@ class PermutationForestRegressor(BasePermutationForest): non-permuted data. .. warning:: Permutation testing with forests is computationally expensive. - As a result, if you are testing for the importance of feature sets, consider - using :class:`sktree.stats.FeatureImportanceForestRegressor` or - :class:`sktree.stats.FeatureImportanceForestClassifier` instead, which is - much more computationally efficient. + As a result, if you are testing for the importance of feature sets, consider + using `sktree.FeatureImportanceForestRegressor` or + `sktree.FeatureImportanceForestClassifier` instead, which is + much more computationally efficient. .. note:: This does not allow testing on the posteriors. @@ -319,11 +325,6 @@ class PermutationForestRegressor(BasePermutationForest): the L1 loss using the median of each terminal node, and "poisson" which uses reduction in Poisson deviance to find splits. - splitter : {"best", "random"}, default="best" - The strategy used to choose the split at each node. Supported - strategies are "best" to choose the best split and "random" to choose - the best random split. - max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -400,11 +401,7 @@ class PermutationForestRegressor(BasePermutationForest): Only available if bootstrap=True. n_jobs : int, default=None - The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, - :meth:`decision_path` and :meth:`apply` are all parallelized over the - trees. ``None`` means 1 unless in a `joblib.parallel_backend` - context. ``-1`` means using all processors. See :term:`Glossary - ` for more details. + The number of jobs to run in parallel. random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used @@ -421,32 +418,6 @@ class PermutationForestRegressor(BasePermutationForest): and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. - class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ - default=None - Weights associated with classes in the form ``{class_label: weight}``. - If not given, all classes are supposed to have weight one. For - multi-output problems, a list of dicts can be provided in the same - order as the columns of y. - - Note that for multioutput (including multilabel) weights should be - defined for each class of every column in its own dict. For example, - for four-class multilabel classification weights should be - [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of - [{1:1}, {2:5}, {3:1}, {4:1}]. - - The "balanced" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies in the input data - as ``n_samples / (n_classes * np.bincount(y))`` - - The "balanced_subsample" mode is the same as "balanced" except that - weights are computed based on the bootstrap sample for every tree - grown. - - For multi-output, the weights of each column of y will be multiplied. - - Note that these weights will be multiplied with sample_weight (passed - through the fit method) if sample_weight is specified. - ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than @@ -462,22 +433,6 @@ class PermutationForestRegressor(BasePermutationForest): - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0.0, 1.0]`. - honest_prior : {"ignore", "uniform", "empirical"}, default="empirical" - Method for dealing with empty leaves during evaluation of a test - sample. If "ignore", the tree is ignored. If "uniform", the prior tree - posterior is 1/(number of classes). If "empirical", the prior tree - posterior is the relative class frequency in the voting subsample. - If all trees are ignored, the empirical estimate is returned. - - honest_fraction : float, default=0.5 - Fraction of training samples used for estimates in the trees. The - remaining samples will be used to learn the tree structure. A larger - fraction creates shallower trees with lower variance estimates. - - tree_estimator : object, default=None - Type of decision tree classifier to use. By default `None`, which - defaults to :class:`sklearn.tree.DecisionTreeClassifier`. - Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -517,7 +472,6 @@ def __init__( warm_start=False, ccp_alpha=0.0, max_samples=None, - **estimators_kwargs, ): super().__init__( estimator=estimator, @@ -538,7 +492,6 @@ def __init__( warm_start=warm_start, ccp_alpha=ccp_alpha, max_samples=max_samples, - **estimators_kwargs, ) def _get_estimator(self): @@ -561,7 +514,6 @@ def _get_estimator(self): warm_start=self.warm_start, ccp_alpha=self.ccp_alpha, max_samples=self.max_samples, - **self.estimator_kwargs, ) elif not isinstance(self.estimator_, ForestRegressor): raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") @@ -578,10 +530,10 @@ class PermutationForestClassifier(BasePermutationForest): non-permuted data. .. warning:: Permutation testing with forests is computationally expensive. - As a result, if you are testing for the importance of feature sets, consider - using :class:`sktree.stats.FeatureImportanceForestRegressor` or - :class:`sktree.stats.FeatureImportanceForestClassifier` instead, which is - much more computationally efficient. + As a result, if you are testing for the importance of feature sets, consider + using `sktree.FeatureImportanceForestRegressor` or + `sktree.FeatureImportanceForestClassifier` instead, which is + much more computationally efficient. .. note:: This does not allow testing on the posteriors. @@ -598,11 +550,6 @@ class PermutationForestClassifier(BasePermutationForest): The function to measure the quality of a split. Supported criteria are "gini" for the Gini impurity and "entropy" for the information gain. - splitter : {"best", "random"}, default="best" - The strategy used to choose the split at each node. Supported - strategies are "best" to choose the best split and "random" to choose - the best random split. - max_depth : int, default=None The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than @@ -679,11 +626,7 @@ class PermutationForestClassifier(BasePermutationForest): Only available if bootstrap=True. n_jobs : int, default=None - The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, - :meth:`decision_path` and :meth:`apply` are all parallelized over the - trees. ``None`` means 1 unless in a `joblib.parallel_backend` - context. ``-1`` means using all processors. See :term:`Glossary - ` for more details. + The number of jobs to run in parallel. random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used @@ -700,32 +643,6 @@ class PermutationForestClassifier(BasePermutationForest): and add more estimators to the ensemble, otherwise, just fit a whole new forest. See :term:`the Glossary `. - class_weight : {"balanced", "balanced_subsample"}, dict or list of dicts, \ - default=None - Weights associated with classes in the form ``{class_label: weight}``. - If not given, all classes are supposed to have weight one. For - multi-output problems, a list of dicts can be provided in the same - order as the columns of y. - - Note that for multioutput (including multilabel) weights should be - defined for each class of every column in its own dict. For example, - for four-class multilabel classification weights should be - [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of - [{1:1}, {2:5}, {3:1}, {4:1}]. - - The "balanced" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies in the input data - as ``n_samples / (n_classes * np.bincount(y))`` - - The "balanced_subsample" mode is the same as "balanced" except that - weights are computed based on the bootstrap sample for every tree - grown. - - For multi-output, the weights of each column of y will be multiplied. - - Note that these weights will be multiplied with sample_weight (passed - through the fit method) if sample_weight is specified. - ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than @@ -741,22 +658,6 @@ class PermutationForestClassifier(BasePermutationForest): - If float, then draw `max_samples * X.shape[0]` samples. Thus, `max_samples` should be in the interval `(0.0, 1.0]`. - honest_prior : {"ignore", "uniform", "empirical"}, default="empirical" - Method for dealing with empty leaves during evaluation of a test - sample. If "ignore", the tree is ignored. If "uniform", the prior tree - posterior is 1/(number of classes). If "empirical", the prior tree - posterior is the relative class frequency in the voting subsample. - If all trees are ignored, the empirical estimate is returned. - - honest_fraction : float, default=0.5 - Fraction of training samples used for estimates in the trees. The - remaining samples will be used to learn the tree structure. A larger - fraction creates shallower trees with lower variance estimates. - - tree_estimator : object, default=None - Type of decision tree classifier to use. By default `None`, which - defaults to :class:`sklearn.tree.DecisionTreeClassifier`. - Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -796,7 +697,6 @@ def __init__( warm_start=False, ccp_alpha=0.0, max_samples=None, - **estimators_kwargs, ): super().__init__( estimator=estimator, @@ -817,7 +717,6 @@ def __init__( warm_start=warm_start, ccp_alpha=ccp_alpha, max_samples=max_samples, - **estimators_kwargs, ) def _get_estimator(self): @@ -840,7 +739,6 @@ def _get_estimator(self): warm_start=self.warm_start, ccp_alpha=self.ccp_alpha, max_samples=self.max_samples, - **self.estimator_kwargs, ) elif not isinstance(self.estimator_, ForestClassifier): raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator_)}") From a28a8426f6006486d5915f008f3c0635b4e2c238 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 11:51:22 -0400 Subject: [PATCH 24/70] Fix unit-test Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 31 ++++++++++++++++++- sktree/stats/tests/test_forestht.py | 6 +++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 7c812d6d8..8a76cdb35 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -685,7 +685,36 @@ "X500: 1.0\n", "X2: 0.004975124378109453\n", "X1: 1.0\n", - "X500: 1.0\n" + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n" ] } ], diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 2e4f3d375..2ef6958f9 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -200,6 +200,7 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, assert pvalue > 0.05, f"pvalue: {pvalue}" +@flaky(max_runs=3) @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"]) @pytest.mark.parametrize( @@ -223,7 +224,10 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): max_features=max_features, n_estimators=n_estimators, estimator=HonestForestClassifier( - n_estimators=n_estimators, tree_estimator=estimator, honest_prior=honest_prior + n_estimators=n_estimators, + tree_estimator=estimator, + honest_prior=honest_prior, + random_state=0, ), n_jobs=-1, sample_dataset_per_tree=True, From b88e12e36de3eccf1749343886b2a3b2df7db260 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 12:46:37 -0400 Subject: [PATCH 25/70] Clean up API Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 216 +---------- sktree/stats/forestht.py | 360 +----------------- sktree/stats/permutationforest.py | 360 +----------------- sktree/stats/tests/test_forestht.py | 46 ++- 4 files changed, 73 insertions(+), 909 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 8a76cdb35..ce008e796 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, "outputs": [], @@ -15,6 +15,7 @@ "import seaborn as sns\n", "from scipy.special import expit\n", "\n", + "from sktree import RandomForestClassifier, RandomForestRegressor\n", "from sktree.stats import (\n", " FeatureImportanceForestClassifier,\n", " FeatureImportanceForestRegressor,\n", @@ -37,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -84,14 +85,12 @@ "\n", " # initialize hypothesis tester\n", " est = FeatureImportanceForestRegressor(\n", - " max_features=1.0,\n", + " RandomForestRegressor(\n", + " max_features=1.0, random_state=seed, n_estimators=n_estimators, n_jobs=-1\n", + " ),\n", " random_state=seed,\n", - " n_estimators=n_estimators,\n", - " n_jobs=-1,\n", " permute_per_tree=permute_per_tree,\n", " sample_dataset_per_tree=sample_dataset_per_tree,\n", - " # bootstrap=True,\n", - " # max_samples=subsample_size\n", " )\n", " pvalue_dict = {}\n", "\n", @@ -139,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -190,14 +189,15 @@ "\n", " # initialize hypothesis tester\n", " est = FeatureImportanceForestClassifier(\n", - " max_features=1.0,\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", " random_state=seed,\n", - " n_estimators=n_estimators,\n", - " n_jobs=-1,\n", " permute_per_tree=permute_per_tree,\n", " sample_dataset_per_tree=sample_dataset_per_tree,\n", - " # bootstrap=True,\n", - " # max_samples=subsample_size\n", " )\n", "\n", " # test for X_2 important\n", @@ -609,112 +609,8 @@ "X1: 1.0\n", "X500: 1.0\n", "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n" + "X500: 1.0\n" ] } ], @@ -747,90 +643,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
X2X1X500sigma_factor
01.0000000.0049751.0000000.005
10.0049750.0049751.0000000.005
21.0000000.0049751.0000000.005
30.0049751.0000000.0049750.005
40.0049751.0000000.0049750.005
\n", - "
" - ], - "text/plain": [ - " X2 X1 X500 sigma_factor\n", - "0 1.000000 0.004975 1.000000 0.005\n", - "1 0.004975 0.004975 1.000000 0.005\n", - "2 1.000000 0.004975 1.000000 0.005\n", - "3 0.004975 1.000000 0.004975 0.005\n", - "4 0.004975 1.000000 0.004975 0.005" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(df.head())" ] diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 410a53974..49ec36c1c 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -30,47 +30,21 @@ class BaseForestHT(MetaEstimatorMixin): def __init__( self, estimator=None, - n_estimators=100, - criterion="squared_error", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, permute_per_tree=True, sample_dataset_per_tree=True, ): self.estimator = estimator - self.n_jobs = n_jobs - self.n_estimators = n_estimators - self.criterion = criterion - self.max_depth = max_depth - self.min_samples_split = min_samples_split - self.min_samples_leaf = min_samples_leaf - self.min_weight_fraction_leaf = min_weight_fraction_leaf - self.max_features = max_features - self.max_leaf_nodes = max_leaf_nodes - self.min_impurity_decrease = min_impurity_decrease - self.bootstrap = bootstrap - self.oob_score = oob_score self.random_state = random_state self.verbose = verbose - self.warm_start = warm_start - self.ccp_alpha = ccp_alpha - self.max_samples = max_samples self.permute_per_tree = permute_per_tree self.sample_dataset_per_tree = sample_dataset_per_tree + @property + def n_estimators(self): + return self.estimator_.n_estimators + def reset(self): class_attributes = dir(type(self)) instance_attributes = dir(self) @@ -303,92 +277,6 @@ class FeatureImportanceForestRegressor(BaseForestHT): Type of forest estimator to use. By default `None`, which defaults to :class:`sklearn.ensemble.RandomForestRegressor`. - n_estimators : int, default=100 - The number of trees in the forest. - - criterion : {"gini", "entropy"}, default="gini" - The function to measure the quality of a split. Supported criteria are - "gini" for the Gini impurity and "entropy" for the information gain. - Note: this parameter is tree-specific. - - max_depth : int, default=None - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than - min_samples_split samples. - - min_samples_split : int or float, default=2 - The minimum number of samples required to split an internal node: - - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. - - min_samples_leaf : int or float, default=1 - The minimum number of samples required to be at a leaf node. - A split point at any depth will only be considered if it leaves at - least ``min_samples_leaf`` training samples in each of the left and - right branches. This may have the effect of smoothing the model, - especially in regression. - - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. - - min_weight_fraction_leaf : float, default=0.0 - The minimum weighted fraction of the sum total of weights (of all - the input samples) required to be at a leaf node. Samples have - equal weight when sample_weight is not provided. - - max_features : {"sqrt", "log2", None}, int or float, default="sqrt" - The number of features to consider when looking for the best split: - - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `round(max_features * n_features)` features are considered at each - split. - - If "auto", then `max_features=sqrt(n_features)`. - - If "sqrt", then `max_features=sqrt(n_features)`. - - If "log2", then `max_features=log2(n_features)`. - - If None, then `max_features=n_features`. - - Note: the search for a split does not stop until at least one - valid partition of the node samples is found, even if it requires to - effectively inspect more than ``max_features`` features. - - max_leaf_nodes : int, default=None - Grow trees with ``max_leaf_nodes`` in best-first fashion. - Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. - - min_impurity_decrease : float, default=0.0 - A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. - - The weighted impurity decrease equation is the following:: - - N_t / N * (impurity - N_t_R / N_t * right_impurity - - N_t_L / N_t * left_impurity) - - where ``N`` is the total number of samples, ``N_t`` is the number of - samples at the current node, ``N_t_L`` is the number of samples in the - left child, and ``N_t_R`` is the number of samples in the right child. - - ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, - if ``sample_weight`` is passed. - - bootstrap : bool, default=True - Whether bootstrap samples are used when building trees. If False, the - whole dataset is used to build each tree. - - oob_score : bool, default=False - Whether to use out-of-bag samples to estimate the generalization score. - Only available if bootstrap=True. - - n_jobs : int, default=None - The number of jobs to run in parallel. - random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the @@ -399,26 +287,6 @@ class FeatureImportanceForestRegressor(BaseForestHT): verbose : int, default=0 Controls the verbosity when fitting and predicting. - warm_start : bool, default=False - When set to ``True``, reuse the solution of the previous call to fit - and add more estimators to the ensemble, otherwise, just fit a whole - new forest. See :term:`the Glossary `. - - ccp_alpha : non-negative float, default=0.0 - Complexity parameter used for Minimal Cost-Complexity Pruning. The - subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. - - max_samples : int or float, default=None - If bootstrap is True, the number of samples to draw from X - to train each base tree estimator. - - - If None (default), then draw `X.shape[0]` samples. - - If int, then draw `max_samples` samples. - - If float, then draw `max_samples * X.shape[0]` samples. Thus, - `max_samples` should be in the interval `(0.0, 1.0]`. - permute_per_tree : bool, default=True Whether to permute the covariate index per tree or per forest. @@ -427,6 +295,9 @@ class FeatureImportanceForestRegressor(BaseForestHT): Attributes ---------- + estimator_ : BaseForest + The estimator used to compute the test statistic. + samples_ : ArrayLike of shape (n_samples,) The indices of the samples used in the final test. @@ -447,70 +318,22 @@ class FeatureImportanceForestRegressor(BaseForestHT): def __init__( self, estimator=None, - n_estimators=100, - criterion="squared_error", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, permute_per_tree=True, sample_dataset_per_tree=True, ): super().__init__( estimator=estimator, - n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, random_state=random_state, verbose=verbose, - warm_start=warm_start, - ccp_alpha=ccp_alpha, - max_samples=max_samples, + permute_per_tree=permute_per_tree, + sample_dataset_per_tree=sample_dataset_per_tree, ) - self.permute_per_tree = permute_per_tree - self.sample_dataset_per_tree = sample_dataset_per_tree def _get_estimator(self): if self.estimator is None: - estimator_ = RandomForestRegressor( - n_estimators=self.n_estimators, - criterion=self.criterion, - max_depth=self.max_depth, - min_samples_split=self.min_samples_split, - min_samples_leaf=self.min_samples_leaf, - min_weight_fraction_leaf=self.min_weight_fraction_leaf, - max_features=self.max_features, - max_leaf_nodes=self.max_leaf_nodes, - min_impurity_decrease=self.min_impurity_decrease, - bootstrap=self.bootstrap, - oob_score=self.oob_score, - n_jobs=self.n_jobs, - random_state=self.random_state, - verbose=self.verbose, - warm_start=self.warm_start, - ccp_alpha=self.ccp_alpha, - max_samples=self.max_samples, - ) + estimator_ = RandomForestRegressor() elif not isinstance(self.estimator, (ForestRegressor, sklearnForestRegressor)): raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator)}") else: @@ -659,92 +482,6 @@ class FeatureImportanceForestClassifier(BaseForestHT): Type of forest estimator to use. By default `None`, which defaults to :class:`sklearn.ensemble.RandomForestRegressor`. - n_estimators : int, default=100 - The number of trees in the forest. - - criterion : {"gini", "entropy"}, default="gini" - The function to measure the quality of a split. Supported criteria are - "gini" for the Gini impurity and "entropy" for the information gain. - Note: this parameter is tree-specific. - - max_depth : int, default=None - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than - min_samples_split samples. - - min_samples_split : int or float, default=2 - The minimum number of samples required to split an internal node: - - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. - - min_samples_leaf : int or float, default=1 - The minimum number of samples required to be at a leaf node. - A split point at any depth will only be considered if it leaves at - least ``min_samples_leaf`` training samples in each of the left and - right branches. This may have the effect of smoothing the model, - especially in regression. - - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. - - min_weight_fraction_leaf : float, default=0.0 - The minimum weighted fraction of the sum total of weights (of all - the input samples) required to be at a leaf node. Samples have - equal weight when sample_weight is not provided. - - max_features : {"sqrt", "log2", None}, int or float, default="sqrt" - The number of features to consider when looking for the best split: - - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `round(max_features * n_features)` features are considered at each - split. - - If "auto", then `max_features=sqrt(n_features)`. - - If "sqrt", then `max_features=sqrt(n_features)`. - - If "log2", then `max_features=log2(n_features)`. - - If None, then `max_features=n_features`. - - Note: the search for a split does not stop until at least one - valid partition of the node samples is found, even if it requires to - effectively inspect more than ``max_features`` features. - - max_leaf_nodes : int, default=None - Grow trees with ``max_leaf_nodes`` in best-first fashion. - Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. - - min_impurity_decrease : float, default=0.0 - A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. - - The weighted impurity decrease equation is the following:: - - N_t / N * (impurity - N_t_R / N_t * right_impurity - - N_t_L / N_t * left_impurity) - - where ``N`` is the total number of samples, ``N_t`` is the number of - samples at the current node, ``N_t_L`` is the number of samples in the - left child, and ``N_t_R`` is the number of samples in the right child. - - ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, - if ``sample_weight`` is passed. - - bootstrap : bool, default=True - Whether bootstrap samples are used when building trees. If False, the - whole dataset is used to build each tree. - - oob_score : bool, default=False - Whether to use out-of-bag samples to estimate the generalization score. - Only available if bootstrap=True. - - n_jobs : int, default=None - The number of jobs to run in parallel. - random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the @@ -755,26 +492,6 @@ class FeatureImportanceForestClassifier(BaseForestHT): verbose : int, default=0 Controls the verbosity when fitting and predicting. - warm_start : bool, default=False - When set to ``True``, reuse the solution of the previous call to fit - and add more estimators to the ensemble, otherwise, just fit a whole - new forest. See :term:`the Glossary `. - - ccp_alpha : non-negative float, default=0.0 - Complexity parameter used for Minimal Cost-Complexity Pruning. The - subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. - - max_samples : int or float, default=None - If bootstrap is True, the number of samples to draw from X - to train each base tree estimator. - - - If None (default), then draw `X.shape[0]` samples. - - If int, then draw `max_samples` samples. - - If float, then draw `max_samples * X.shape[0]` samples. Thus, - `max_samples` should be in the interval `(0.0, 1.0]`. - permute_per_tree : bool, default=True Whether to permute the covariate index per tree or per forest. @@ -783,6 +500,9 @@ class FeatureImportanceForestClassifier(BaseForestHT): Attributes ---------- + estimator_ : BaseForest + The estimator used to compute the test statistic. + samples_ : ArrayLike of shape (n_samples,) The indices of the samples used in the final test. @@ -803,70 +523,22 @@ class FeatureImportanceForestClassifier(BaseForestHT): def __init__( self, estimator=None, - n_estimators=100, - criterion="gini", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, permute_per_tree=True, sample_dataset_per_tree=True, ): super().__init__( estimator=estimator, - n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, random_state=random_state, verbose=verbose, - warm_start=warm_start, - ccp_alpha=ccp_alpha, - max_samples=max_samples, + permute_per_tree=permute_per_tree, + sample_dataset_per_tree=sample_dataset_per_tree, ) - self.permute_per_tree = permute_per_tree - self.sample_dataset_per_tree = sample_dataset_per_tree def _get_estimator(self): if self.estimator is None: - estimator_ = RandomForestClassifier( - n_estimators=self.n_estimators, - criterion=self.criterion, - max_depth=self.max_depth, - min_samples_split=self.min_samples_split, - min_samples_leaf=self.min_samples_leaf, - min_weight_fraction_leaf=self.min_weight_fraction_leaf, - max_features=self.max_features, - max_leaf_nodes=self.max_leaf_nodes, - min_impurity_decrease=self.min_impurity_decrease, - bootstrap=self.bootstrap, - oob_score=self.oob_score, - n_jobs=self.n_jobs, - random_state=self.random_state, - verbose=self.verbose, - warm_start=self.warm_start, - ccp_alpha=self.ccp_alpha, - max_samples=self.max_samples, - ) + estimator_ = RandomForestClassifier() elif not isinstance(self.estimator, (ForestClassifier, sklearnForestClassifier)): raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator)}") else: diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 2040f7003..b51a182bb 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -2,6 +2,8 @@ from numpy.typing import ArrayLike from sklearn.base import MetaEstimatorMixin, clone, is_classifier from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor +from sklearn.ensemble._forest import ForestClassifier as sklearnForestClassifier +from sklearn.ensemble._forest import ForestRegressor as sklearnForestRegressor from sklearn.model_selection import train_test_split from sklearn.utils.validation import check_X_y @@ -14,42 +16,12 @@ class BasePermutationForest(MetaEstimatorMixin): def __init__( self, estimator=None, - n_estimators=100, - criterion="squared_error", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features=1.0, - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, ): self.estimator = estimator - self.n_jobs = n_jobs - self.n_estimators = n_estimators - self.criterion = criterion - self.max_depth = max_depth - self.min_samples_split = min_samples_split - self.min_samples_leaf = min_samples_leaf - self.min_weight_fraction_leaf = min_weight_fraction_leaf - self.max_features = max_features - self.max_leaf_nodes = max_leaf_nodes - self.min_impurity_decrease = min_impurity_decrease - self.bootstrap = bootstrap - self.oob_score = oob_score self.random_state = random_state self.verbose = verbose - self.warm_start = warm_start - self.ccp_alpha = ccp_alpha - self.max_samples = max_samples def reset(self): class_attributes = dir(type(self)) @@ -309,99 +281,7 @@ class PermutationForestRegressor(BasePermutationForest): ---------- estimator : object, default=None Type of forest estimator to use. By default `None`, which defaults to - :class:`sklearn.ensemble.RandomForestRegressor`. - - n_estimators : int, default=100 - The number of trees in the forest. - - criterion : {"squared_error", "friedman_mse", "absolute_error", \ - "poisson"}, default="squared_error" - The function to measure the quality of a split. Supported criteria - are "squared_error" for the mean squared error, which is equal to - variance reduction as feature selection criterion and minimizes the L2 - loss using the mean of each terminal node, "friedman_mse", which uses - mean squared error with Friedman's improvement score for potential - splits, "absolute_error" for the mean absolute error, which minimizes - the L1 loss using the median of each terminal node, and "poisson" which - uses reduction in Poisson deviance to find splits. - - max_depth : int, default=None - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than - min_samples_split samples. - - min_samples_split : int or float, default=2 - The minimum number of samples required to split an internal node: - - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. - - min_samples_leaf : int or float, default=1 - The minimum number of samples required to be at a leaf node. - A split point at any depth will only be considered if it leaves at - least ``min_samples_leaf`` training samples in each of the left and - right branches. This may have the effect of smoothing the model, - especially in regression. - - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. - - min_weight_fraction_leaf : float, default=0.0 - The minimum weighted fraction of the sum total of weights (of all - the input samples) required to be at a leaf node. Samples have - equal weight when sample_weight is not provided. - - max_features : {"sqrt", "log2", None}, int or float, default="sqrt" - The number of features to consider when looking for the best split: - - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `round(max_features * n_features)` features are considered at each - split. - - If "auto", then `max_features=sqrt(n_features)`. - - If "sqrt", then `max_features=sqrt(n_features)`. - - If "log2", then `max_features=log2(n_features)`. - - If None, then `max_features=n_features`. - - Note: the search for a split does not stop until at least one - valid partition of the node samples is found, even if it requires to - effectively inspect more than ``max_features`` features. - - max_leaf_nodes : int, default=None - Grow trees with ``max_leaf_nodes`` in best-first fashion. - Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. - - min_impurity_decrease : float, default=0.0 - A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. - - The weighted impurity decrease equation is the following:: - - N_t / N * (impurity - N_t_R / N_t * right_impurity - - N_t_L / N_t * left_impurity) - - where ``N`` is the total number of samples, ``N_t`` is the number of - samples at the current node, ``N_t_L`` is the number of samples in the - left child, and ``N_t_R`` is the number of samples in the right child. - - ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, - if ``sample_weight`` is passed. - - bootstrap : bool, default=True - Whether bootstrap samples are used when building trees. If False, the - whole dataset is used to build each tree. - - oob_score : bool, default=False - Whether to use out-of-bag samples to estimate the generalization score. - Only available if bootstrap=True. - - n_jobs : int, default=None - The number of jobs to run in parallel. + :class:`sklearn.ensemble.RandomForestRegressor` with default parameters. random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used @@ -413,26 +293,6 @@ class PermutationForestRegressor(BasePermutationForest): verbose : int, default=0 Controls the verbosity when fitting and predicting. - warm_start : bool, default=False - When set to ``True``, reuse the solution of the previous call to fit - and add more estimators to the ensemble, otherwise, just fit a whole - new forest. See :term:`the Glossary `. - - ccp_alpha : non-negative float, default=0.0 - Complexity parameter used for Minimal Cost-Complexity Pruning. The - subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. - - max_samples : int or float, default=None - If bootstrap is True, the number of samples to draw from X - to train each base tree estimator. - - - If None (default), then draw `X.shape[0]` samples. - - If int, then draw `max_samples` samples. - - If float, then draw `max_samples * X.shape[0]` samples. Thus, - `max_samples` should be in the interval `(0.0, 1.0]`. - Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -455,68 +315,22 @@ class PermutationForestRegressor(BasePermutationForest): def __init__( self, estimator=None, - n_estimators=100, - criterion="squared_error", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features=1.0, - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, ): super().__init__( estimator=estimator, - n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, random_state=random_state, verbose=verbose, - warm_start=warm_start, - ccp_alpha=ccp_alpha, - max_samples=max_samples, ) def _get_estimator(self): if not hasattr(self, "estimator_") and self.estimator is None: - estimator_ = RandomForestRegressor( - n_estimators=self.n_estimators, - criterion=self.criterion, - max_depth=self.max_depth, - min_samples_split=self.min_samples_split, - min_samples_leaf=self.min_samples_leaf, - min_weight_fraction_leaf=self.min_weight_fraction_leaf, - max_features=self.max_features, - max_leaf_nodes=self.max_leaf_nodes, - min_impurity_decrease=self.min_impurity_decrease, - bootstrap=self.bootstrap, - oob_score=self.oob_score, - n_jobs=self.n_jobs, - random_state=self.random_state, - verbose=self.verbose, - warm_start=self.warm_start, - ccp_alpha=self.ccp_alpha, - max_samples=self.max_samples, - ) - elif not isinstance(self.estimator_, ForestRegressor): - raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator_)}") + estimator_ = RandomForestRegressor() + elif not isinstance(self.estimator, (ForestRegressor, sklearnForestRegressor)): + raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator)}") + else: + estimator_ = self.estimator return estimator_ @@ -543,88 +357,6 @@ class PermutationForestClassifier(BasePermutationForest): Type of forest estimator to use. By default `None`, which defaults to :class:`sklearn.ensemble.RandomForestClassifier`. - n_estimators : int, default=100 - The number of trees in the forest. - - criterion : {"gini", "entropy"}, default="gini" - The function to measure the quality of a split. Supported criteria are - "gini" for the Gini impurity and "entropy" for the information gain. - - max_depth : int, default=None - The maximum depth of the tree. If None, then nodes are expanded until - all leaves are pure or until all leaves contain less than - min_samples_split samples. - - min_samples_split : int or float, default=2 - The minimum number of samples required to split an internal node: - - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. - - min_samples_leaf : int or float, default=1 - The minimum number of samples required to be at a leaf node. - A split point at any depth will only be considered if it leaves at - least ``min_samples_leaf`` training samples in each of the left and - right branches. This may have the effect of smoothing the model, - especially in regression. - - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. - - min_weight_fraction_leaf : float, default=0.0 - The minimum weighted fraction of the sum total of weights (of all - the input samples) required to be at a leaf node. Samples have - equal weight when sample_weight is not provided. - - max_features : {"sqrt", "log2", None}, int or float, default="sqrt" - The number of features to consider when looking for the best split: - - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `round(max_features * n_features)` features are considered at each - split. - - If "auto", then `max_features=sqrt(n_features)`. - - If "sqrt", then `max_features=sqrt(n_features)`. - - If "log2", then `max_features=log2(n_features)`. - - If None, then `max_features=n_features`. - - Note: the search for a split does not stop until at least one - valid partition of the node samples is found, even if it requires to - effectively inspect more than ``max_features`` features. - - max_leaf_nodes : int, default=None - Grow trees with ``max_leaf_nodes`` in best-first fashion. - Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. - - min_impurity_decrease : float, default=0.0 - A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. - - The weighted impurity decrease equation is the following:: - - N_t / N * (impurity - N_t_R / N_t * right_impurity - - N_t_L / N_t * left_impurity) - - where ``N`` is the total number of samples, ``N_t`` is the number of - samples at the current node, ``N_t_L`` is the number of samples in the - left child, and ``N_t_R`` is the number of samples in the right child. - - ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum, - if ``sample_weight`` is passed. - - bootstrap : bool, default=True - Whether bootstrap samples are used when building trees. If False, the - whole dataset is used to build each tree. - - oob_score : bool, default=False - Whether to use out-of-bag samples to estimate the generalization score. - Only available if bootstrap=True. - n_jobs : int, default=None The number of jobs to run in parallel. @@ -638,26 +370,6 @@ class PermutationForestClassifier(BasePermutationForest): verbose : int, default=0 Controls the verbosity when fitting and predicting. - warm_start : bool, default=False - When set to ``True``, reuse the solution of the previous call to fit - and add more estimators to the ensemble, otherwise, just fit a whole - new forest. See :term:`the Glossary `. - - ccp_alpha : non-negative float, default=0.0 - Complexity parameter used for Minimal Cost-Complexity Pruning. The - subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. - - max_samples : int or float, default=None - If bootstrap is True, the number of samples to draw from X - to train each base tree estimator. - - - If None (default), then draw `X.shape[0]` samples. - - If int, then draw `max_samples` samples. - - If float, then draw `max_samples * X.shape[0]` samples. Thus, - `max_samples` should be in the interval `(0.0, 1.0]`. - Attributes ---------- samples_ : ArrayLike of shape (n_samples,) @@ -680,66 +392,20 @@ class PermutationForestClassifier(BasePermutationForest): def __init__( self, estimator=None, - n_estimators=100, - criterion="gini", - max_depth=None, - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_features="sqrt", - max_leaf_nodes=None, - min_impurity_decrease=0.0, - bootstrap=False, - oob_score=False, - n_jobs=None, random_state=None, verbose=0, - warm_start=False, - ccp_alpha=0.0, - max_samples=None, ): super().__init__( estimator=estimator, - n_estimators=n_estimators, - criterion=criterion, - max_depth=max_depth, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_features=max_features, - max_leaf_nodes=max_leaf_nodes, - min_impurity_decrease=min_impurity_decrease, - bootstrap=bootstrap, - oob_score=oob_score, - n_jobs=n_jobs, random_state=random_state, verbose=verbose, - warm_start=warm_start, - ccp_alpha=ccp_alpha, - max_samples=max_samples, ) def _get_estimator(self): if not hasattr(self, "estimator_") and self.estimator is None: - estimator_ = RandomForestClassifier( - n_estimators=self.n_estimators, - criterion=self.criterion, - max_depth=self.max_depth, - min_samples_split=self.min_samples_split, - min_samples_leaf=self.min_samples_leaf, - min_weight_fraction_leaf=self.min_weight_fraction_leaf, - max_features=self.max_features, - max_leaf_nodes=self.max_leaf_nodes, - min_impurity_decrease=self.min_impurity_decrease, - bootstrap=self.bootstrap, - oob_score=self.oob_score, - n_jobs=self.n_jobs, - random_state=self.random_state, - verbose=self.verbose, - warm_start=self.warm_start, - ccp_alpha=self.ccp_alpha, - max_samples=self.max_samples, - ) - elif not isinstance(self.estimator_, ForestClassifier): - raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator_)}") + estimator_ = RandomForestClassifier() + elif not isinstance(self.estimator, (ForestClassifier, sklearnForestClassifier)): + raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator)}") + else: + estimator_ = self.estimator return estimator_ diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 2ef6958f9..3366dc1ea 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -4,7 +4,7 @@ from scipy.special import expit from sklearn import datasets -from sktree import HonestForestClassifier +from sktree import HonestForestClassifier, RandomForestClassifier, RandomForestRegressor from sktree._lib.sklearn.tree import DecisionTreeClassifier from sktree.stats import ( FeatureImportanceForestClassifier, @@ -37,10 +37,13 @@ [ PermutationForestRegressor, { - "max_features": "sqrt", + "estimator": RandomForestRegressor( + max_features="sqrt", + random_state=seed, + n_estimators=75, + n_jobs=-1, + ), "random_state": seed, - "n_estimators": 75, - "n_jobs": -1, }, 300, 50, @@ -49,12 +52,15 @@ [ FeatureImportanceForestRegressor, { - "max_features": "sqrt", + "estimator": RandomForestRegressor( + max_features="sqrt", + random_state=seed, + n_estimators=125, + n_jobs=-1, + ), "random_state": seed, - "n_estimators": 125, "permute_per_tree": True, "sample_dataset_per_tree": True, - "n_jobs": -1, }, 300, 500, @@ -118,10 +124,13 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) [ PermutationForestClassifier, { - "max_features": "sqrt", + "estimator": RandomForestClassifier( + max_features="sqrt", + random_state=seed, + n_estimators=50, + n_jobs=-1, + ), "random_state": seed, - "n_estimators": 50, - "n_jobs": -1, }, 600, 50, @@ -130,12 +139,15 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) [ FeatureImportanceForestClassifier, { - "max_features": "sqrt", + "estimator": RandomForestClassifier( + max_features="sqrt", + random_state=seed, + n_estimators=125, + n_jobs=-1, + ), "random_state": seed, - "n_estimators": 125, "permute_per_tree": True, "sample_dataset_per_tree": True, - "n_jobs": -1, }, 600, 200, @@ -219,17 +231,15 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): # Check consistency on dataset iris. clf = FeatureImportanceForestClassifier( - criterion=criterion, - random_state=0, - max_features=max_features, - n_estimators=n_estimators, estimator=HonestForestClassifier( + criterion=criterion, n_estimators=n_estimators, + max_features=max_features, tree_estimator=estimator, honest_prior=honest_prior, random_state=0, + n_jobs=-1, ), - n_jobs=-1, sample_dataset_per_tree=True, permute_per_tree=True, ) From 579003fd6b6610a1ac2f39e4ab3ed6c4b381e00b Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 15:49:35 -0400 Subject: [PATCH 26/70] Working clean code Signed-off-by: Adam Li --- .spin/cmds.py | 7 +- README.md | 2 +- .../test_permutation_forest.ipynb | 353 +++++++++++++++++- doc/conf.py | 1 + sktree/conftest.py | 19 + sktree/stats/forestht.py | 352 +++++++++-------- sktree/stats/permutationforest.py | 72 +++- sktree/stats/tests/test_forestht.py | 38 +- 8 files changed, 628 insertions(+), 216 deletions(-) diff --git a/.spin/cmds.py b/.spin/cmds.py index 1a5fc67e4..30380208d 100644 --- a/.spin/cmds.py +++ b/.spin/cmds.py @@ -33,15 +33,10 @@ def docs(ctx, build_dir, clean=False, noplot=False): util.run(["pip", "install", "-q", "-r", "doc_requirements.txt"]) ctx.invoke(meson.docs) - # os.environ["SPHINXOPTS"] = "-W" - # os.environ["PYTHONPATH"] = f'{site_path}{os.sep}:{os.environ.get("PYTHONPATH", "")}' - # if noplot: - # util.run(["make", "-C", "docs", "clean", "html-noplot"], replace=True) - # else: - # util.run(["make", "-C", "docs", "clean", "html"], replace=True) @click.command() +@click.option("--runslow", help="Run slow tests.") @click.pass_context def coverage(ctx): """📊 Generate coverage report""" diff --git a/README.md b/README.md index 36e39fd02..4da75fef1 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ See here for the documentation for our dev version: https://docs.neurodata.io/sc Why oblique trees and why trees beyond those in scikit-learn? ============================================================= -In 2001, Leo Breiman proposed two types of Random Forests. One was known as ``Forest-RI``, which is the axis-aligned traditional random forest. One was known as ``Forest-RC``, which is the random oblique linear combinations random forest. This leveraged random combinations of features to perform splits. [MORF](1) builds upon ``Forest-RC`` by proposing additional functions to combine features. Other modern tree variants such as Canonical Correlation Forests (CCF), or unsupervised random forests are also important at solving real-world problems using robust decision tree models. +In 2001, Leo Breiman proposed two types of Random Forests. One was known as ``Forest-RI``, which is the axis-aligned traditional random forest. One was known as ``Forest-RC``, which is the random oblique linear combinations random forest. This leveraged random combinations of features to perform splits. [MORF](1) builds upon ``Forest-RC`` by proposing additional functions to combine features. Other modern tree variants such as Canonical Correlation Forests (CCF), Extended Isolation Forests, Quantile Forests, or unsupervised random forests are also important at solving real-world problems using robust decision tree models. Installation ============ diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index ce008e796..e8859626e 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -595,7 +595,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, + "id": "36c53ff3-984d-4428-87c1-3421098e0081", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.005 0.285625 0.56625 0.846875 1.1275 1.408125 1.68875 1.969375\n", + " 2.25 ]\n" + ] + } + ], + "source": [ + "print(j_space)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", "metadata": { "scrolled": true @@ -610,7 +629,226 @@ "X500: 1.0\n", "X2: 1.0\n", "X1: 1.0\n", - "X500: 1.0\n" + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n" ] } ], @@ -633,30 +871,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], "source": [ "print(\"done\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X2X1X500sigma_factor
01.01.0000001.0000000.01
11.01.0000001.0000000.01
21.00.0049750.0049750.01
31.01.0000001.0000000.01
41.00.0049750.0049750.01
\n", + "
" + ], + "text/plain": [ + " X2 X1 X500 sigma_factor\n", + "0 1.0 1.000000 1.000000 0.01\n", + "1 1.0 1.000000 1.000000 0.01\n", + "2 1.0 0.004975 0.004975 0.01\n", + "3 1.0 1.000000 1.000000 0.01\n", + "4 1.0 0.004975 0.004975 0.01" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(df.head())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", diff --git a/doc/conf.py b/doc/conf.py index 48bc28c6e..fbab7cddd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -206,6 +206,7 @@ "Ignored", "UnsupervisedSplitter", "n_repeats", + "n_samples_test_used", # from sklearn "such", "arrays", diff --git a/sktree/conftest.py b/sktree/conftest.py index c226c3c60..7b69b7098 100644 --- a/sktree/conftest.py +++ b/sktree/conftest.py @@ -1,3 +1,22 @@ +# import pytest + + +# def pytest_addoption(parser): +# parser.addoption( +# "--runslow", action="store_true", default=False, help="run slow tests" +# ) + + def pytest_configure(config): """Set up pytest markers.""" config.addinivalue_line("markers", "slowtest: mark test as slow") + + +# def pytest_collection_modifyitems(config, items): +# if config.getoption("--runslow"): +# # --runslow given in cli: do not skip slow tests +# return +# skip_slow = pytest.mark.skip(reason="need --runslow option to run") +# for item in items: +# if "slow" in item.keywords: +# item.add_marker(skip_slow) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 49ec36c1c..9393ec91f 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -32,12 +32,14 @@ def __init__( estimator=None, random_state=None, verbose=0, + test_size=0.2, permute_per_tree=True, sample_dataset_per_tree=True, ): self.estimator = estimator self.random_state = random_state self.verbose = verbose + self.test_size = test_size self.permute_per_tree = permute_per_tree self.sample_dataset_per_tree = sample_dataset_per_tree @@ -53,6 +55,58 @@ def reset(self): if attr_name.endswith("_") and attr_name not in class_attributes: delattr(self, attr_name) + def _get_estimators_indices(self): + indices = np.arange(self._n_samples_, dtype=int) + + # Get drawn indices along both sample and feature axes + if self.permute_per_tree and self.sample_dataset_per_tree: + for tree in self.estimator_.estimators_: + seed = tree.random_state + + # Operations accessing random_state must be performed identically + # to those in `_parallel_build_trees()` + indices_train, indices_test = train_test_split( + indices, test_size=self.test_size, shuffle=True, random_state=seed + ) + + yield indices_train, indices_test + else: + indices_train, indices_test = train_test_split( + indices, test_size=self.test_size, shuffle=True, random_state=self.random_state + ) + for tree in self.estimator_.estimators_: + yield indices_train, indices_test + + @property + def train_test_samples_(self): + """ + The subset of drawn samples for each base estimator. + + Returns a dynamically generated list of indices identifying + the samples used for fitting each member of the ensemble, i.e., + the in-bag samples. + + Note: the list is re-created at each call to the property in order + to reduce the object memory footprint by not storing the sampling + data. Thus fetching the property may be slower than expected. + """ + return [ + (indices_train, indices_test) + for indices_train, indices_test in self._get_estimators_indices() + ] + + def _statistic( + self, + estimator: BaseForest, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + **metric_kwargs, + ): + raise NotImplementedError("Subclasses should implement this!") + def statistic( self, X: ArrayLike, @@ -101,6 +155,9 @@ def statistic( if y.ndim != 2: y = y.reshape(-1, 1) + if self.sample_dataset_per_tree and not self.permute_per_tree: + raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True") + if covariate_index is None: self.estimator_ = self._get_estimator() estimator = self.estimator_ @@ -108,8 +165,48 @@ def statistic( self.permuted_estimator_ = clone(self.estimator_) estimator = self.permuted_estimator_ + # Infer type of target y + if not hasattr(self, "_type_of_target"): + self._type_of_target = type_of_target(y) + + # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking + # and then setting the internal meta data structures + # first run a dummy fit on the samples to initialize the + # internal data structure of the forest + if not _is_fitted(estimator) and is_classifier(estimator): + _unique_y = [] + for axis in range(y.shape[1]): + _unique_y.append(np.unique(y[:, axis])) + unique_y = np.hstack(_unique_y) + if unique_y.shape[1] == 1: + unique_y = unique_y.ravel() + X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) + estimator.fit(X_dummy, unique_y) + elif not _is_fitted(estimator): + if y.shape[1] == 1: + estimator.fit(X[:2], y[:2].ravel()) + else: + estimator.fit(X[:2], y[:2]) + + # permute per tree + n_samples = X.shape[0] + self._n_samples_ = n_samples + if self.sample_dataset_per_tree: + self.n_samples_test_ = n_samples + else: + # not permute per tree + test_size_ = int(self.test_size * n_samples) + + # Fit each tree and compute posteriors with train test splits + self.n_samples_test_ = test_size_ + if not is_classifier(self.estimator_) and metric not in REGRESSOR_METRICS: - raise RuntimeError(f'Metric must be either "mse" or "mae", got {metric}') + raise RuntimeError( + f'Metric must be either "mse" or "mae" if using Regression, got {metric}' + ) + + if estimator.n_outputs_ > 1 and metric == "auc": + raise ValueError("AUC metric is not supported for multi-output") return self._statistic( estimator, @@ -174,11 +271,11 @@ def test( if y.ndim != 2: y = y.reshape(-1, 1) - indices = np.arange(X.shape[0]) - self.test_size_ = int(test_size * X.shape[0]) - indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) - self.indices_train_ = indices_train - self.indices_test_ = indices_test + # indices = np.arange(X.shape[0]) + # self.test_size_ = int(test_size * X.shape[0]) + # indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) + # self.indices_train_ = indices_train + # self.indices_test_ = indices_test if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data @@ -220,11 +317,11 @@ def test( seed=self.random_state, ) else: - if self.permute_per_tree: - y_test = y + if not self.sample_dataset_per_tree: + _, indices_test = self.train_test_samples_[0] + y_test = y[indices_test, :] else: - y_test = y[self.indices_test_, :] - print(y.shape, observe_posteriors.shape, permute_posteriors.shape) + y_test = y metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, y_pred_proba_normal=observe_posteriors, @@ -298,8 +395,18 @@ class FeatureImportanceForestRegressor(BaseForestHT): estimator_ : BaseForest The estimator used to compute the test statistic. - samples_ : ArrayLike of shape (n_samples,) - The indices of the samples used in the final test. + n_samples_test_ : int + The number of samples used in the final test set. + + indices_train_ : ArrayLike of shape (n_samples_train,) + The indices of the samples used in the training set. + + indices_test_ : ArrayLike of shape (n_samples_test,) + The indices of the samples used in the testing set. + + samples_ : ArrayLike of shape (n_samples_final,) + The indices of the samples used in the final test set that would slice + the original ``(X, y)`` input. y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. @@ -320,6 +427,7 @@ def __init__( estimator=None, random_state=None, verbose=0, + test_size=0.2, permute_per_tree=True, sample_dataset_per_tree=True, ): @@ -327,6 +435,7 @@ def __init__( estimator=estimator, random_state=random_state, verbose=verbose, + test_size=test_size, permute_per_tree=permute_per_tree, sample_dataset_per_tree=sample_dataset_per_tree, ) @@ -342,7 +451,7 @@ def _get_estimator(self): def _statistic( self, - estimator: BaseForest, + estimator: ForestRegressor, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, @@ -353,79 +462,40 @@ def _statistic( """Helper function to compute the test statistic.""" metric_func = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) - n_samples = X.shape[0] - - if self.permute_per_tree and not self.sample_dataset_per_tree: - # first run a dummy fit on the samples to initialize the - # internal data structure of the forest - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - - # Fit each tree and compute posteriors with train test splits - n_samples_test = len(self.indices_test_) + if self.permute_per_tree: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) - for idx in range(self.n_estimators): + posterior_arr = np.zeros( + (self.n_estimators, self.n_samples_test_, estimator.n_outputs_) + ) + for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): tree: DecisionTreeRegressor = estimator.estimators_[idx] - train_tree( - tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index - ) - - y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) - - # Fill test set posteriors & set rest NaN - posterior_arr[idx, ...] = y_pred # posterior - - y_true_final = y[self.indices_test_, :] - # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() - elif self.permute_per_tree and self.sample_dataset_per_tree: - # first run a dummy fit on the samples to initialize the - # internal data structure of the forest - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - - if hasattr(self, "test_size_"): - test_size = self.test_size_ - else: - test_size = 0.2 # type: ignore - - # now initialize posterior array as (n_trees, n_samples, n_outputs) - posterior_arr = np.full((self.n_estimators, n_samples, estimator.n_outputs_), np.nan) - # Fit each tree and compute posteriors with train test splits - for idx in range(self.n_estimators): - # sample train/test dataset for each tree - indices_train, indices_test = train_test_split( - np.arange(n_samples, dtype=int), - test_size=test_size, - shuffle=True, - random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), - ) - tree = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + print(posterior_arr.shape, y_pred.shape) posterior_arr[idx, indices_test, :] = y_pred # posterior - # Average all posteriors + y_true_final = y[indices_test, :] + + # Average all posteriors (n_samples_test, n_outputs) posterior_final = np.nanmean(posterior_arr, axis=0) # Find the row indices with NaN values in any column nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + samples = nonnan_indices # Ignore all NaN values (samples not tested) y_true_final = y[nonnan_indices, :] posterior_final = posterior_final[nonnan_indices, :] - samples = nonnan_indices else: - X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] - y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + # fitting a forest will only get one unique train/test split + indices_train, indices_test = self.train_test_samples_[0] + + X_train, X_test = X[indices_train, :], X[indices_test, :] + y_train, y_test = y[indices_train, :], y[indices_test, :] if covariate_index is not None: # perform permutation of covariates @@ -442,7 +512,7 @@ def _statistic( y_pred = estimator.predict(X_test) # set variables to compute metric - samples = self.indices_test_ + samples = indices_test y_true_final = y_test posterior_final = y_pred @@ -503,8 +573,18 @@ class FeatureImportanceForestClassifier(BaseForestHT): estimator_ : BaseForest The estimator used to compute the test statistic. - samples_ : ArrayLike of shape (n_samples,) - The indices of the samples used in the final test. + n_samples_test_ : int + The number of samples used in the final test set. + + indices_train_ : ArrayLike of shape (n_samples_train,) + The indices of the samples used in the training set. + + indices_test_ : ArrayLike of shape (n_samples_test,) + The indices of the samples used in the testing set. + + samples_ : ArrayLike of shape (n_samples_final,) + The indices of the samples used in the final test set that would slice + the original ``(X, y)`` input. y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. @@ -525,6 +605,7 @@ def __init__( estimator=None, random_state=None, verbose=0, + test_size=0.2, permute_per_tree=True, sample_dataset_per_tree=True, ): @@ -532,6 +613,7 @@ def __init__( estimator=estimator, random_state=random_state, verbose=verbose, + test_size=test_size, permute_per_tree=permute_per_tree, sample_dataset_per_tree=sample_dataset_per_tree, ) @@ -542,16 +624,13 @@ def _get_estimator(self): elif not isinstance(self.estimator, (ForestClassifier, sklearnForestClassifier)): raise RuntimeError(f"Estimator must be a ForestClassifier, got {type(self.estimator)}") else: - # self.estimator is an instance of a ForestEstimator, so we should verify that all - # the parameters are set correctly - # XXX: implement checks - + # self.estimator is an instance of a ForestEstimator estimator_ = self.estimator return estimator_ def _statistic( self, - estimator: BaseForest, + estimator: ForestClassifier, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, @@ -562,94 +641,53 @@ def _statistic( """Helper function to compute the test statistic.""" metric_func = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) - n_samples = X.shape[0] if metric in POSTERIOR_FUNCTIONS: predict_posteriors = True else: predict_posteriors = False - if hasattr(self, "test_size_"): - test_size = self.test_size_ - else: - test_size = 0.2 # type: ignore - - if not _is_fitted(estimator): - unique_y = np.unique(y) - X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) - estimator.fit(X_dummy, unique_y) - if estimator.n_outputs_ > 1 and metric == "auc": - raise ValueError("AUC metric is not supported for multi-output classification") - if self.permute_per_tree: - # first run a dummy fit on the samples to initialize the - # internal data structure of the forest - if self.sample_dataset_per_tree: - # Fit each tree and compute posteriors with train test splits - n_samples_test = len(self.indices_test_) - else: - n_samples_test = n_samples - if predict_posteriors: - posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_classes_)) + posterior_arr = np.zeros( + (self.n_estimators, self.n_samples_test_, estimator.n_classes_) + ) else: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.zeros((self.n_estimators, n_samples_test, estimator.n_outputs_)) - - if self.sample_dataset_per_tree: - for idx in range(self.n_estimators): - tree: DecisionTreeClassifier = estimator.estimators_[idx] - train_tree( - tree, X[self.indices_train_, :], y[self.indices_train_, :], covariate_index - ) - - if predict_posteriors: - # XXX: currently assumes n_outputs_ == 1 - y_pred = tree.predict_proba(X[self.indices_test_, :]) - else: - y_pred = tree.predict(X[self.indices_test_, :]).reshape(-1, tree.n_outputs_) - - # Fill test set posteriors & set rest NaN - posterior_arr[idx, ...] = y_pred # posterior - - y_true_final = y[self.indices_test_, :] - # Average all posteriors - posterior_final = np.nanmean(posterior_arr, axis=0) - samples = np.argwhere(~np.isnan(posterior_final).any(axis=1)).squeeze() - else: - # Fit each tree and compute posteriors with train test splits - for idx in range(self.n_estimators): - # sample train/test dataset for each tree - indices_train, indices_test = train_test_split( - np.arange(n_samples, dtype=int), - test_size=test_size, - shuffle=True, - random_state=rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32), - ) - tree = estimator.estimators_[idx] - train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) - - if predict_posteriors: - # XXX: currently assumes n_outputs_ == 1 - y_pred = tree.predict_proba(X[indices_test, :]) - else: - y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - - posterior_arr[idx, indices_test, :] = y_pred # posterior - - # Average all posteriors (n_samples, n_outputs) - posterior_final = np.nanmean(posterior_arr, axis=0) - - # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] - - # Ignore all NaN values (samples not tested) - y_true_final = y[nonnan_indices, :] - posterior_final = posterior_final[nonnan_indices, :] - samples = nonnan_indices + posterior_arr = np.zeros( + (self.n_estimators, self.n_samples_test_, estimator.n_outputs_) + ) + + for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): + tree: DecisionTreeClassifier = estimator.estimators_[idx] + train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + y_pred = tree.predict_proba(X[indices_test, :]) + else: + y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + print(posterior_arr.shape, y_pred.shape) + posterior_arr[idx, indices_test, :] = y_pred # posterior + + # Average all posteriors (n_samples_test, n_outputs) + posterior_final = np.nanmean(posterior_arr, axis=0) + + # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + samples = nonnan_indices + + # Ignore all NaN values (samples not tested) + y_true_final = y[nonnan_indices, :] + posterior_final = posterior_final[nonnan_indices, :] else: - X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] - y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] + # fitting a forest will only get one unique train/test split + indices_train, indices_test = self.train_test_samples_[0] + + X_train, X_test = X[indices_train, :], X[indices_test, :] + y_train, y_test = y[indices_train, :], y[indices_test, :] if covariate_index is not None: # perform permutation of covariates @@ -671,18 +709,18 @@ def _statistic( y_pred = estimator.predict(X_test) # set variables to compute metric - samples = self.indices_test_ + samples = indices_test y_true_final = y_test posterior_final = y_pred if metric == "auc": # at this point, posterior_final is the predicted posterior for only the positive class # as more than one output is not supported. - if type_of_target(y_true_final) == "binary": + if self._type_of_target == "binary": posterior_final = posterior_final[:, 1] else: raise RuntimeError( - f"AUC metric is not supported for {type_of_target(y_true_final)} targets." + f"AUC metric is not supported for {self._type_of_target} targets." ) stat = metric_func(y_true_final, posterior_final, **metric_kwargs) diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index b51a182bb..026b89eca 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -16,10 +16,12 @@ class BasePermutationForest(MetaEstimatorMixin): def __init__( self, estimator=None, + test_size=0.2, random_state=None, verbose=0, ): self.estimator = estimator + self.test_size = test_size self.random_state = random_state self.verbose = verbose @@ -34,6 +36,27 @@ def reset(self): def _get_estimator(self): pass + @property + def train_test_samples_(self): + """ + The subset of drawn samples for each base estimator. + + Returns a dynamically generated list of indices identifying + the samples used for fitting each member of the ensemble, i.e., + the in-bag samples. + + Note: the list is re-created at each call to the property in order + to reduce the object memory footprint by not storing the sampling + data. Thus fetching the property may be slower than expected. + """ + indices = np.arange(self._n_samples_, dtype=int) + + # Get drawn indices along both sample and feature axes + indices_train, indices_test = train_test_split( + indices, test_size=self.test_size, shuffle=True, random_state=self.random_state + ) + return indices_train, indices_test + def _statistic( self, estimator: BaseForest, @@ -51,25 +74,27 @@ def _statistic( rng = np.random.default_rng(self.random_state) else: rng = np.random.default_rng(seed) - n_samples = X.shape[0] - indices = np.arange(n_samples, dtype=int) - + indices_train, indices_test = self.train_test_samples_ if covariate_index is not None: + n_samples = X.shape[0] + indices = np.arange(n_samples, dtype=int) # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=False) + index_arr = rng.choice(indices, size=(n_samples, 1), replace=False, shuffle=False) X = X.copy() X[:, covariate_index] = X[index_arr, covariate_index] - X_train, X_test = X[self.indices_train_, :], X[self.indices_test_, :] - y_train, y_test = y[self.indices_train_, :], y[self.indices_test_, :] - - estimator.fit(X_train, y_train.ravel()) + X_train, X_test = X[indices_train, :], X[indices_test, :] + y_train, y_test = y[indices_train, :], y[indices_test, :] + if y_train.shape[1] == 1: + y_train = y_train.ravel() + y_test = y_test.ravel() + estimator.fit(X_train, y_train) # Either get the predicted value, or the posterior probabilities y_pred = estimator.predict(X_test) # set variables to compute metric - samples = self.indices_test_ + samples = indices_test y_true_final = y_test posterior_final = y_pred @@ -79,7 +104,7 @@ def _statistic( # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior self.samples_ = samples - self.y_true_final_ = y_true_final + self.y_true_ = y_true_final self.posterior_final_ = posterior_final self.stat_ = stat @@ -139,6 +164,7 @@ def statistic( if y.ndim != 2: y = y.reshape(-1, 1) + self._n_samples_ = X.shape[0] self.estimator_ = self._get_estimator() if is_classifier(self.estimator_): @@ -173,7 +199,6 @@ def test( y: ArrayLike, covariate_index: ArrayLike, metric: str = "mse", - test_size: float = 0.2, n_repeats: int = 1000, return_posteriors: bool = False, **metric_kwargs, @@ -210,14 +235,11 @@ def test( X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) if y.ndim != 2: y = y.reshape(-1, 1) - - indices = np.arange(X.shape[0]) + self._n_samples_ = X.shape[0] # train/test split # XXX: could add stratifying by y when y is classification - indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) - self.indices_train_ = indices_train - self.indices_test_ = indices_test + indices_train, indices_test = self.train_test_samples_ if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data @@ -238,10 +260,10 @@ def test( # compute null distribution of the test statistic # WARNING: this could take a long time, since it fits a new forest null_dist = _compute_null_distribution_perm( - X_train=X[self.indices_train_, :], - y_train=y[self.indices_train_, :], - X_test=X[self.indices_test_, :], - y_test=y[self.indices_test_, :], + X_train=X[indices_train, :], + y_train=y[indices_train, :], + X_test=X[indices_test, :], + y_test=y[indices_test, :], covariate_index=covariate_index, est=self.estimator_, metric=metric, @@ -283,6 +305,9 @@ class PermutationForestRegressor(BasePermutationForest): Type of forest estimator to use. By default `None`, which defaults to :class:`sklearn.ensemble.RandomForestRegressor` with default parameters. + test_size : float, default=0.2 + The proportion of samples to leave out for each tree to compute metric on. + random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the @@ -315,11 +340,13 @@ class PermutationForestRegressor(BasePermutationForest): def __init__( self, estimator=None, + test_size=0.2, random_state=None, verbose=0, ): super().__init__( estimator=estimator, + test_size=test_size, random_state=random_state, verbose=verbose, ) @@ -357,6 +384,9 @@ class PermutationForestClassifier(BasePermutationForest): Type of forest estimator to use. By default `None`, which defaults to :class:`sklearn.ensemble.RandomForestClassifier`. + test_size : float, default=0.2 + The proportion of samples to leave out for each tree to compute metric on. + n_jobs : int, default=None The number of jobs to run in parallel. @@ -392,11 +422,13 @@ class PermutationForestClassifier(BasePermutationForest): def __init__( self, estimator=None, + test_size=0.2, random_state=None, verbose=0, ): super().__init__( estimator=estimator, + test_size=test_size, random_state=random_state, verbose=verbose, ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 3366dc1ea..9f5f0f384 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -62,9 +62,9 @@ "permute_per_tree": True, "sample_dataset_per_tree": True, }, - 300, - 500, - 0.1, + 300, # n_samples + 500, # n_repeats + 0.1, # test_size ], ], ) @@ -94,24 +94,22 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) # compute final y of (n_samples,) y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon - est = hypotester(**model_kwargs) + est = hypotester(test_size=test_size, **model_kwargs) # test for X_1 - stat, pvalue = est.test(X, y, [0], metric=metric, test_size=test_size, n_repeats=n_repeats) + stat, pvalue = est.test(X, y, [0], metric=metric, n_repeats=n_repeats) print("X1: ", pvalue) assert pvalue < 0.05, f"pvalue: {pvalue}" # test for X_6 - stat, pvalue = est.test(X, y, [5], metric=metric, test_size=test_size, n_repeats=n_repeats) + stat, pvalue = est.test(X, y, [5], metric=metric, n_repeats=n_repeats) print("X6: ", pvalue) assert pvalue < 0.05, f"pvalue: {pvalue}" # test for a few unimportant other X for covariate_index in [1, 6]: # test for X_2, X_7 - stat, pvalue = est.test( - X, y, [covariate_index], metric=metric, test_size=test_size, n_repeats=n_repeats - ) + stat, pvalue = est.test(X, y, [covariate_index], metric=metric, n_repeats=n_repeats) print("X2/7: ", pvalue) assert pvalue > 0.05, f"pvalue: {pvalue}" @@ -188,26 +186,20 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, assert y_proba.shape == (n_samples,) y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) - est = hypotester(**model_kwargs) + est = hypotester(test_size=test_size, **model_kwargs) # test for X_2 important - stat, pvalue = est.test( - X.copy(), y.copy(), [1], test_size=test_size, n_repeats=n_repeats, metric=metric - ) + stat, pvalue = est.test(X.copy(), y.copy(), [1], n_repeats=n_repeats, metric=metric) print("X2: ", pvalue) assert pvalue < 0.05, f"pvalue: {pvalue}" # test for X_1 unimportant - stat, pvalue = est.test( - X.copy(), y.copy(), [0], test_size=test_size, n_repeats=n_repeats, metric=metric - ) + stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric) print("X1: ", pvalue) assert pvalue > 0.05, f"pvalue: {pvalue}" # test for X_500 unimportant - stat, pvalue = est.test( - X.copy(), y.copy(), [n - 1], test_size=test_size, n_repeats=n_repeats, metric=metric - ) + stat, pvalue = est.test(X.copy(), y.copy(), [n - 1], n_repeats=n_repeats, metric=metric) print("X500: ", pvalue) assert pvalue > 0.05, f"pvalue: {pvalue}" @@ -228,6 +220,7 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): max_features = "sqrt" n_repeats = 200 n_estimators = 100 + test_size = 0.1 # Check consistency on dataset iris. clf = FeatureImportanceForestClassifier( @@ -240,6 +233,7 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): random_state=0, n_jobs=-1, ), + test_size=test_size, sample_dataset_per_tree=True, permute_per_tree=True, ) @@ -247,23 +241,19 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): X = np.hstack((iris_X, rng.standard_normal(size=(iris_X.shape[0], 4)))) # test for unimportant feature set - test_size = 0.1 clf.reset() stat, pvalue = clf.test( X, iris_y, np.arange(iris_X.shape[0], X.shape[1], dtype=int).tolist(), n_repeats=n_repeats, - test_size=test_size, metric="auc", ) print(pvalue) assert pvalue > 0.05, f"pvalue: {pvalue}" # test for important features that are permuted - stat, pvalue = clf.test( - X, iris_y, [0, 1, 2, 3], n_repeats=n_repeats, test_size=test_size, metric="auc" - ) + stat, pvalue = clf.test(X, iris_y, [0, 1, 2, 3], n_repeats=n_repeats, metric="auc") print(pvalue) assert pvalue < 0.05, f"pvalue: {pvalue}" From 2fa68cea2353ca72043aaff4bb30b223cb4a6762 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 16:02:04 -0400 Subject: [PATCH 27/70] Working clean code Signed-off-by: Adam Li --- .spin/cmds.py | 1 - .../test_permutation_forest.ipynb | 258 +++--------------- sktree/stats/forestht.py | 6 +- 3 files changed, 33 insertions(+), 232 deletions(-) diff --git a/.spin/cmds.py b/.spin/cmds.py index 30380208d..5b8585dde 100644 --- a/.spin/cmds.py +++ b/.spin/cmds.py @@ -36,7 +36,6 @@ def docs(ctx, build_dir, clean=False, noplot=False): @click.command() -@click.option("--runslow", help="Run slow tests.") @click.pass_context def coverage(ctx): """📊 Generate coverage report""" diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index e8859626e..3ef50202d 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -88,6 +88,7 @@ " RandomForestRegressor(\n", " max_features=1.0, random_state=seed, n_estimators=n_estimators, n_jobs=-1\n", " ),\n", + " test_size=test_size,\n", " random_state=seed,\n", " permute_per_tree=permute_per_tree,\n", " sample_dataset_per_tree=sample_dataset_per_tree,\n", @@ -96,7 +97,7 @@ "\n", " # test for X_1\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [0], metric=metric, n_repeats=n_repeats, test_size=test_size\n", + " X.copy(), y.copy(), [0], metric=metric, n_repeats=n_repeats\n", " )\n", " print(\"X1: \", pvalue)\n", " pvalue_dict[\"X1\"] = pvalue\n", @@ -104,7 +105,7 @@ "\n", " # test for X_6\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [5], metric=metric, n_repeats=n_repeats, test_size=test_size\n", + " X.copy(), y.copy(), [5], metric=metric, n_repeats=n_repeats\n", " )\n", " print(\"X6: \", pvalue)\n", " pvalue_dict[\"X6\"] = pvalue\n", @@ -119,7 +120,6 @@ " [covariate_index],\n", " metric=metric,\n", " n_repeats=n_repeats,\n", - " test_size=test_size,\n", " )\n", " print(\"X2/7: \", pvalue)\n", " pvalue_dict[name] = pvalue\n", @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -196,6 +196,7 @@ " n_jobs=-1,\n", " ),\n", " random_state=seed,\n", + " test_size=test_size,\n", " permute_per_tree=permute_per_tree,\n", " sample_dataset_per_tree=sample_dataset_per_tree,\n", " )\n", @@ -205,7 +206,6 @@ " X.copy(),\n", " y.copy(),\n", " covariate_index=[1],\n", - " test_size=test_size,\n", " n_repeats=n_repeats,\n", " metric=metric,\n", " )\n", @@ -215,7 +215,7 @@ "\n", " # test for X_1 unimportant\n", " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [0], test_size=test_size, n_repeats=n_repeats, metric=metric\n", + " X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric\n", " )\n", " pvalue_dict[\"X1\"] = pvalue\n", " print(\"X1: \", pvalue)\n", @@ -226,7 +226,6 @@ " X.copy(),\n", " y.copy(),\n", " [n - 1],\n", - " test_size=test_size,\n", " n_repeats=n_repeats,\n", " metric=metric,\n", " )\n", @@ -614,82 +613,14 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 29, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", "X2: 0.004975124378109453\n", "X1: 0.004975124378109453\n", "X500: 0.004975124378109453\n", @@ -699,156 +630,28 @@ "X2: 0.004975124378109453\n", "X1: 1.0\n", "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n" + "X1: 1.0\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[29], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m 8\u001b[0m new_seed \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mintegers(\u001b[38;5;241m0\u001b[39m, np\u001b[38;5;241m.\u001b[39miinfo(np\u001b[38;5;241m.\u001b[39muint32)\u001b[38;5;241m.\u001b[39mmax, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39muint32)\n\u001b[0;32m---> 10\u001b[0m elements_dict \u001b[38;5;241m=\u001b[39m \u001b[43mcorrelated_logit_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbeta\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_seed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m elements_dict\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 12\u001b[0m pvalue_dict[key]\u001b[38;5;241m.\u001b[39mappend(value)\n", + "Cell \u001b[0;32mIn[18], line 80\u001b[0m, in \u001b[0;36mcorrelated_logit_model\u001b[0;34m(beta, seed)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX1: \u001b[39m\u001b[38;5;124m\"\u001b[39m, pvalue)\n\u001b[1;32m 77\u001b[0m \u001b[38;5;66;03m# assert pvalue > 0.05, f\"pvalue: {pvalue}\"\u001b[39;00m\n\u001b[1;32m 78\u001b[0m \n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# test for X_500 unimportant\u001b[39;00m\n\u001b[0;32m---> 80\u001b[0m stat, pvalue \u001b[38;5;241m=\u001b[39m \u001b[43mest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 81\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 82\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 83\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_repeats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_repeats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m pvalue_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX500\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pvalue\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX500: \u001b[39m\u001b[38;5;124m\"\u001b[39m, pvalue)\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:297\u001b[0m, in \u001b[0;36mBaseForestHT.test\u001b[0;34m(self, X, y, covariate_index, metric, test_size, n_repeats, return_posteriors, **metric_kwargs)\u001b[0m\n\u001b[1;32m 294\u001b[0m observe_stat \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstat_\n\u001b[1;32m 296\u001b[0m \u001b[38;5;66;03m# next permute the data\u001b[39;00m\n\u001b[0;32m--> 297\u001b[0m permute_stat, permute_posteriors, permute_samples \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstatistic\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcovariate_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_posteriors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmetric_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;66;03m# Note: at this point, both `estimator` and `permuted_estimator_` should\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# have been fitted already, so we can now compute on the null by resampling\u001b[39;00m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;66;03m# the posteriors and computing the test statistic on the resampled posteriors\u001b[39;00m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msample_dataset_per_tree:\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:211\u001b[0m, in \u001b[0;36mBaseForestHT.statistic\u001b[0;34m(self, X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m estimator\u001b[38;5;241m.\u001b[39mn_outputs_ \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m metric \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauc\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAUC metric is not supported for multi-output\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_statistic\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcovariate_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 217\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_posteriors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_posteriors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmetric_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:662\u001b[0m, in \u001b[0;36mFeatureImportanceForestClassifier._statistic\u001b[0;34m(self, estimator, X, y, covariate_index, metric, return_posteriors, **metric_kwargs)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx, (indices_train, indices_test) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_estimators_indices()):\n\u001b[1;32m 661\u001b[0m tree: DecisionTreeClassifier \u001b[38;5;241m=\u001b[39m estimator\u001b[38;5;241m.\u001b[39mestimators_[idx]\n\u001b[0;32m--> 662\u001b[0m \u001b[43mtrain_tree\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtree\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindices_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindices_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 664\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m predict_posteriors:\n\u001b[1;32m 665\u001b[0m \u001b[38;5;66;03m# XXX: currently assumes n_outputs_ == 1\u001b[39;00m\n\u001b[1;32m 666\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m tree\u001b[38;5;241m.\u001b[39mpredict_proba(X[indices_test, :])\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/utils.py:72\u001b[0m, in \u001b[0;36mtrain_tree\u001b[0;34m(tree, X, y, covariate_index)\u001b[0m\n\u001b[1;32m 69\u001b[0m X[:, covariate_index] \u001b[38;5;241m=\u001b[39m perm_X_cov\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# individual tree permutation of y labels\u001b[39;00m\n\u001b[0;32m---> 72\u001b[0m \u001b[43mtree\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1144\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m 1146\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m 1147\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 1148\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m 1149\u001b[0m )\n\u001b[1;32m 1150\u001b[0m ):\n\u001b[0;32m-> 1151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:1270\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[0;34m(self, X, y, sample_weight, check_input, classes)\u001b[0m\n\u001b[1;32m 1230\u001b[0m \u001b[38;5;129m@_fit_context\u001b[39m(prefer_skip_nested_validation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1231\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\n\u001b[1;32m 1232\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1238\u001b[0m ):\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \n\u001b[1;32m 1241\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;124;03m Fitted estimator.\u001b[39;00m\n\u001b[1;32m 1269\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1270\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1271\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1272\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1273\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1274\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[43m \u001b[49m\u001b[43mclasses\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclasses\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1276\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1277\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:421\u001b[0m, in \u001b[0;36mBaseDecisionTree._fit\u001b[0;34m(self, X, y, sample_weight, check_input, missing_values_in_feature_mask, classes)\u001b[0m\n\u001b[1;32m 418\u001b[0m min_weight_leaf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmin_weight_fraction_leaf \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39msum(sample_weight)\n\u001b[1;32m 420\u001b[0m \u001b[38;5;66;03m# build the actual tree now with the parameters\u001b[39;00m\n\u001b[0;32m--> 421\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_build_tree\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 422\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 423\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 424\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 425\u001b[0m \u001b[43m \u001b[49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 426\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_samples_leaf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_samples_leaf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_weight_leaf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_weight_leaf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 428\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_leaf_nodes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_leaf_nodes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 429\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_samples_split\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_samples_split\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 430\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_depth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_depth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 431\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 432\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 434\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:572\u001b[0m, in \u001b[0;36mBaseDecisionTree._build_tree\u001b[0;34m(self, X, y, sample_weight, missing_values_in_feature_mask, min_samples_leaf, min_weight_leaf, max_leaf_nodes, min_samples_split, max_depth, random_state)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuilder_ \u001b[38;5;241m=\u001b[39m BestFirstTreeBuilder(\n\u001b[1;32m 563\u001b[0m splitter,\n\u001b[1;32m 564\u001b[0m min_samples_split,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_leaf_values,\n\u001b[1;32m 571\u001b[0m )\n\u001b[0;32m--> 572\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuilder_\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 573\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtree_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_outputs_ \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m is_classifier(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_[\u001b[38;5;241m0\u001b[39m]\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], @@ -857,6 +660,7 @@ "rng = np.random.default_rng(seed)\n", "\n", "beta_space = np.hstack((np.linspace(0.01, 2.5, 8), np.linspace(5, 20, 7)))\n", + "beta_space = j_space.copy()\n", "for beta in beta_space:\n", " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 9393ec91f..67c20a89e 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -178,12 +178,12 @@ def statistic( for axis in range(y.shape[1]): _unique_y.append(np.unique(y[:, axis])) unique_y = np.hstack(_unique_y) - if unique_y.shape[1] == 1: + if unique_y.ndim > 1 and unique_y.shape[1] == 1: unique_y = unique_y.ravel() X_dummy = np.zeros((unique_y.shape[0], X.shape[1])) estimator.fit(X_dummy, unique_y) elif not _is_fitted(estimator): - if y.shape[1] == 1: + if y.ndim > 1 and y.shape[1] == 1: estimator.fit(X[:2], y[:2].ravel()) else: estimator.fit(X[:2], y[:2]) @@ -475,7 +475,6 @@ def _statistic( y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) # Fill test set posteriors & set rest NaN - print(posterior_arr.shape, y_pred.shape) posterior_arr[idx, indices_test, :] = y_pred # posterior y_true_final = y[indices_test, :] @@ -669,7 +668,6 @@ def _statistic( y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) # Fill test set posteriors & set rest NaN - print(posterior_arr.shape, y_pred.shape) posterior_arr[idx, indices_test, :] = y_pred # posterior # Average all posteriors (n_samples_test, n_outputs) From 7fbdd3813945cafb65372545900ff84e2fcaed6f Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 16:02:57 -0400 Subject: [PATCH 28/70] Working clean code Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 31 ++++++++++--------- sktree/stats/forestht.py | 7 ----- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 3ef50202d..ebf09d426 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 15, + "execution_count": 30, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, "outputs": [], @@ -27,10 +27,19 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -38,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -96,17 +105,13 @@ " pvalue_dict = {}\n", "\n", " # test for X_1\n", - " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [0], metric=metric, n_repeats=n_repeats\n", - " )\n", + " stat, pvalue = est.test(X.copy(), y.copy(), [0], metric=metric, n_repeats=n_repeats)\n", " print(\"X1: \", pvalue)\n", " pvalue_dict[\"X1\"] = pvalue\n", " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", "\n", " # test for X_6\n", - " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [5], metric=metric, n_repeats=n_repeats\n", - " )\n", + " stat, pvalue = est.test(X.copy(), y.copy(), [5], metric=metric, n_repeats=n_repeats)\n", " print(\"X6: \", pvalue)\n", " pvalue_dict[\"X6\"] = pvalue\n", " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", @@ -138,7 +143,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -214,9 +219,7 @@ " # assert pvalue < 0.05, f\"pvalue: {pvalue}\"\n", "\n", " # test for X_1 unimportant\n", - " stat, pvalue = est.test(\n", - " X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric\n", - " )\n", + " stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric)\n", " pvalue_dict[\"X1\"] = pvalue\n", " print(\"X1: \", pvalue)\n", " # assert pvalue > 0.05, f\"pvalue: {pvalue}\"\n", diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 67c20a89e..df37c7b62 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -224,7 +224,6 @@ def test( y, covariate_index: ArrayLike, metric: str = "mse", - test_size: float = 0.2, n_repeats: int = 1000, return_posteriors: bool = False, **metric_kwargs, @@ -271,12 +270,6 @@ def test( if y.ndim != 2: y = y.reshape(-1, 1) - # indices = np.arange(X.shape[0]) - # self.test_size_ = int(test_size * X.shape[0]) - # indices_train, indices_test = train_test_split(indices, test_size=test_size, shuffle=True) - # self.indices_train_ = indices_train - # self.indices_test_ = indices_test - if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( From 918f9348da7b338b51d787be7a9855234d758c01 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 16:15:11 -0400 Subject: [PATCH 29/70] Working clean code Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 175 ++++-------------- sktree/stats/tests/test_forestht.py | 2 +- 2 files changed, 33 insertions(+), 144 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index ebf09d426..779347c0d 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 30, + "execution_count": 1, "id": "b658bdd8-a3e6-4051-9d66-f2a153113234", "metadata": {}, "outputs": [], @@ -27,19 +27,10 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 2, "id": "05b0b53e-0525-45ce-9f7e-0322a30221cf", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -47,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 3, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -143,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 4, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -597,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 6, "id": "36c53ff3-984d-4428-87c1-3421098e0081", "metadata": {}, "outputs": [ @@ -611,12 +602,13 @@ } ], "source": [ + "j_space = np.linspace(0.005, 2.25, 9)\n", "print(j_space)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", "metadata": {}, "outputs": [ @@ -626,35 +618,31 @@ "text": [ "X2: 0.004975124378109453\n", "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", + "X500: 1.0\n", "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", "X1: 1.0\n", "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", "X2: 0.004975124378109453\n", "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", "X500: 0.004975124378109453\n", "X2: 1.0\n", - "X1: 1.0\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[29], line 10\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m 8\u001b[0m new_seed \u001b[38;5;241m=\u001b[39m rng\u001b[38;5;241m.\u001b[39mintegers(\u001b[38;5;241m0\u001b[39m, np\u001b[38;5;241m.\u001b[39miinfo(np\u001b[38;5;241m.\u001b[39muint32)\u001b[38;5;241m.\u001b[39mmax, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39muint32)\n\u001b[0;32m---> 10\u001b[0m elements_dict \u001b[38;5;241m=\u001b[39m \u001b[43mcorrelated_logit_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbeta\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnew_seed\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m elements_dict\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 12\u001b[0m pvalue_dict[key]\u001b[38;5;241m.\u001b[39mappend(value)\n", - "Cell \u001b[0;32mIn[18], line 80\u001b[0m, in \u001b[0;36mcorrelated_logit_model\u001b[0;34m(beta, seed)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX1: \u001b[39m\u001b[38;5;124m\"\u001b[39m, pvalue)\n\u001b[1;32m 77\u001b[0m \u001b[38;5;66;03m# assert pvalue > 0.05, f\"pvalue: {pvalue}\"\u001b[39;00m\n\u001b[1;32m 78\u001b[0m \n\u001b[1;32m 79\u001b[0m \u001b[38;5;66;03m# test for X_500 unimportant\u001b[39;00m\n\u001b[0;32m---> 80\u001b[0m stat, pvalue \u001b[38;5;241m=\u001b[39m \u001b[43mest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 81\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 82\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 83\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mn\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_repeats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_repeats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 87\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m pvalue_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX500\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m pvalue\n\u001b[1;32m 89\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX500: \u001b[39m\u001b[38;5;124m\"\u001b[39m, pvalue)\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:297\u001b[0m, in \u001b[0;36mBaseForestHT.test\u001b[0;34m(self, X, y, covariate_index, metric, test_size, n_repeats, return_posteriors, **metric_kwargs)\u001b[0m\n\u001b[1;32m 294\u001b[0m observe_stat \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstat_\n\u001b[1;32m 296\u001b[0m \u001b[38;5;66;03m# next permute the data\u001b[39;00m\n\u001b[0;32m--> 297\u001b[0m permute_stat, permute_posteriors, permute_samples \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstatistic\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcovariate_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_posteriors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmetric_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;66;03m# Note: at this point, both `estimator` and `permuted_estimator_` should\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# have been fitted already, so we can now compute on the null by resampling\u001b[39;00m\n\u001b[1;32m 309\u001b[0m \u001b[38;5;66;03m# the posteriors and computing the test statistic on the resampled posteriors\u001b[39;00m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msample_dataset_per_tree:\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:211\u001b[0m, in \u001b[0;36mBaseForestHT.statistic\u001b[0;34m(self, X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m estimator\u001b[38;5;241m.\u001b[39mn_outputs_ \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m metric \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mauc\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAUC metric is not supported for multi-output\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_statistic\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcovariate_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 217\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_posteriors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_posteriors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmetric_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/forestht.py:662\u001b[0m, in \u001b[0;36mFeatureImportanceForestClassifier._statistic\u001b[0;34m(self, estimator, X, y, covariate_index, metric, return_posteriors, **metric_kwargs)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m idx, (indices_train, indices_test) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_estimators_indices()):\n\u001b[1;32m 661\u001b[0m tree: DecisionTreeClassifier \u001b[38;5;241m=\u001b[39m estimator\u001b[38;5;241m.\u001b[39mestimators_[idx]\n\u001b[0;32m--> 662\u001b[0m \u001b[43mtrain_tree\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtree\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindices_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m[\u001b[49m\u001b[43mindices_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcovariate_index\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 664\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m predict_posteriors:\n\u001b[1;32m 665\u001b[0m \u001b[38;5;66;03m# XXX: currently assumes n_outputs_ == 1\u001b[39;00m\n\u001b[1;32m 666\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m tree\u001b[38;5;241m.\u001b[39mpredict_proba(X[indices_test, :])\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/stats/utils.py:72\u001b[0m, in \u001b[0;36mtrain_tree\u001b[0;34m(tree, X, y, covariate_index)\u001b[0m\n\u001b[1;32m 69\u001b[0m X[:, covariate_index] \u001b[38;5;241m=\u001b[39m perm_X_cov\n\u001b[1;32m 71\u001b[0m \u001b[38;5;66;03m# individual tree permutation of y labels\u001b[39;00m\n\u001b[0;32m---> 72\u001b[0m \u001b[43mtree\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1144\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m 1146\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m 1147\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 1148\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m 1149\u001b[0m )\n\u001b[1;32m 1150\u001b[0m ):\n\u001b[0;32m-> 1151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:1270\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[0;34m(self, X, y, sample_weight, check_input, classes)\u001b[0m\n\u001b[1;32m 1230\u001b[0m \u001b[38;5;129m@_fit_context\u001b[39m(prefer_skip_nested_validation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 1231\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\n\u001b[1;32m 1232\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1238\u001b[0m ):\n\u001b[1;32m 1239\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[1;32m 1240\u001b[0m \n\u001b[1;32m 1241\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1268\u001b[0m \u001b[38;5;124;03m Fitted estimator.\u001b[39;00m\n\u001b[1;32m 1269\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1270\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1271\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1272\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1273\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1274\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1275\u001b[0m \u001b[43m \u001b[49m\u001b[43mclasses\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclasses\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1276\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1277\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:421\u001b[0m, in \u001b[0;36mBaseDecisionTree._fit\u001b[0;34m(self, X, y, sample_weight, check_input, missing_values_in_feature_mask, classes)\u001b[0m\n\u001b[1;32m 418\u001b[0m min_weight_leaf \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmin_weight_fraction_leaf \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39msum(sample_weight)\n\u001b[1;32m 420\u001b[0m \u001b[38;5;66;03m# build the actual tree now with the parameters\u001b[39;00m\n\u001b[0;32m--> 421\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_build_tree\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 422\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 423\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 424\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 425\u001b[0m \u001b[43m \u001b[49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 426\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_samples_leaf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_samples_leaf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_weight_leaf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_weight_leaf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 428\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_leaf_nodes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_leaf_nodes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 429\u001b[0m \u001b[43m \u001b[49m\u001b[43mmin_samples_split\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmin_samples_split\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 430\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_depth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_depth\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 431\u001b[0m \u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrandom_state\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 432\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 434\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", - "File \u001b[0;32m~/Documents/scikit-tree/sktree/_lib/./sklearn/tree/_classes.py:572\u001b[0m, in \u001b[0;36mBaseDecisionTree._build_tree\u001b[0;34m(self, X, y, sample_weight, missing_values_in_feature_mask, min_samples_leaf, min_weight_leaf, max_leaf_nodes, min_samples_split, max_depth, random_state)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuilder_ \u001b[38;5;241m=\u001b[39m BestFirstTreeBuilder(\n\u001b[1;32m 563\u001b[0m splitter,\n\u001b[1;32m 564\u001b[0m min_samples_split,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstore_leaf_values,\n\u001b[1;32m 571\u001b[0m )\n\u001b[0;32m--> 572\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuilder_\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 573\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtree_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmissing_values_in_feature_mask\u001b[49m\n\u001b[1;32m 574\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 576\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_outputs_ \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m is_classifier(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 577\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_[\u001b[38;5;241m0\u001b[39m]\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n" ] } ], @@ -678,129 +666,30 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "done\n" - ] - } - ], + "outputs": [], "source": [ "print(\"done\")" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
X2X1X500sigma_factor
01.01.0000001.0000000.01
11.01.0000001.0000000.01
21.00.0049750.0049750.01
31.01.0000001.0000000.01
41.00.0049750.0049750.01
\n", - "
" - ], - "text/plain": [ - " X2 X1 X500 sigma_factor\n", - "0 1.0 1.000000 1.000000 0.01\n", - "1 1.0 1.000000 1.000000 0.01\n", - "2 1.0 0.004975 0.004975 0.01\n", - "3 1.0 1.000000 1.000000 0.01\n", - "4 1.0 0.004975 0.004975 0.01" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(df.head())" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 9f5f0f384..eca0b21c5 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -140,7 +140,7 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) "estimator": RandomForestClassifier( max_features="sqrt", random_state=seed, - n_estimators=125, + n_estimators=150, n_jobs=-1, ), "random_state": seed, From bd02877c022faaa252b2a0fd1219e778b156f28f Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 17:37:05 -0400 Subject: [PATCH 30/70] Fixed unit-tests Signed-off-by: Adam Li --- .../test_permutation_forest.ipynb | 227 +++++++++++++++++- sktree/stats/tests/test_forestht.py | 2 +- 2 files changed, 219 insertions(+), 10 deletions(-) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/test_permutation_forest.ipynb index 779347c0d..e2dc525b7 100644 --- a/benchmarks_nonasv/test_permutation_forest.ipynb +++ b/benchmarks_nonasv/test_permutation_forest.ipynb @@ -608,9 +608,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stdout", @@ -642,7 +644,115 @@ "X500: 1.0\n", "X2: 1.0\n", "X1: 0.004975124378109453\n", - "X500: 1.0\n" + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 1.0\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 1.0\n", + "X500: 1.0\n", + "X2: 0.004975124378109453\n", + "X1: 0.004975124378109453\n", + "X500: 0.004975124378109453\n" ] } ], @@ -666,30 +776,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], "source": [ "print(\"done\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X2X1X500sigma_factor
00.0049750.0049751.0000000.005
10.0049750.0049751.0000000.005
21.0000001.0000001.0000000.005
31.0000000.0049751.0000000.005
41.0000001.0000000.0049750.005
\n", + "
" + ], + "text/plain": [ + " X2 X1 X500 sigma_factor\n", + "0 0.004975 0.004975 1.000000 0.005\n", + "1 0.004975 0.004975 1.000000 0.005\n", + "2 1.000000 1.000000 1.000000 0.005\n", + "3 1.000000 0.004975 1.000000 0.005\n", + "4 1.000000 1.000000 0.004975 0.005" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(df.head())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index eca0b21c5..f015a0273 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -140,7 +140,7 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) "estimator": RandomForestClassifier( max_features="sqrt", random_state=seed, - n_estimators=150, + n_estimators=100, n_jobs=-1, ), "random_state": seed, From 1eb8604f04c7f04dc83743bd936cf0b4e2ab25f6 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 18:51:44 -0400 Subject: [PATCH 31/70] Adding coverage Signed-off-by: Adam Li --- ...t_MI_gigantic_hypothesis_testing_forest.py | 118 ++++++++++++++++++ sktree/stats/forestht.py | 6 + sktree/stats/permutationforest.py | 3 - sktree/stats/tests/test_forestht.py | 59 ++++++++- sktree/stats/utils.py | 23 +++- sktree/tree/__init__.py | 3 + sktree/tree/_honest_tree.py | 30 ++++- 7 files changed, 228 insertions(+), 14 deletions(-) create mode 100644 examples/plot_MI_gigantic_hypothesis_testing_forest.py diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py new file mode 100644 index 000000000..2af3e4412 --- /dev/null +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -0,0 +1,118 @@ +""" +=========================================================== +Mutual Information for Gigantic Hypothesis Testing (MIGHT) +=========================================================== + +An example using :class:`~sktree.FeatureImportanceForestClassifier` for nonparametric +multivariate hypothesis test, on simulated datasets. Here, we present a simulation +of how MIGHT is used to test the hypothesis that a "feature set is important for +predicting the target". + +We simulate a dataset with 1000 features, 500 samples, and a binary class target +variable. Within each feature set, there is 500 features associated with one feature +set, and another 500 features associated with another feature set. One could think of +these for example as different datasets collected on the same patient in a biomedical setting. +The first feature set (X) is strongly correlated with the target, and the second +feature set (W) is weakly correlated with the target (y). Here, we are testing the +null hypothesis: + +``H0: I(X; y) - I(X, W; y) = 0`` +``HA: I(X; y) - I(X, W; y) > 0`` + +where ``I`` is mutual information. + +We present causal settings where this would be true: + +- ``W X -> y``: here ``W`` is completely disconnected from X and y. +- ``W -> X -> y``: here ``W`` is d-separated from y given X. +- ``W -> y <- X``: here ``W`` is a weak predictor of y, and X is a strong predictor of y. +- ``X <- W -> y; X -> y``: here ``W`` is a weak confounder of the relationship between X and y. + +We then use MIGHT to test the hypothesis that the first feature set is important for +predicting the target, and the second feature set is not important for predicting the +target. We use :class:`~sktree.FeatureImportanceForestClassifier`. +""" + +import numpy as np +from scipy.special import expit + +from sktree import HonestForestClassifier +from sktree.stats import FeatureImportanceForestClassifier +from sktree.tree import DecisionTreeClassifier + +seed = 12345 +rng = np.random.default_rng(seed) + +# %% +# Simulate data +# ------------- +# We simulate the two feature sets, and the target variable. We then combine them +# into a single dataset to perform hypothesis testing. + +n_samples = 1000 +n_features_set = 500 +mean = 1.0 +sigma = 2.0 +beta = 5.0 + +unimportant_mean = 0.0 +unimportant_sigma = 4.5 + +# first sample the informative features, and then the uniformative features +X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10)) +X_important = np.hstack( + [ + X_important, + rng.normal( + loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10) + ), + ] +) + +X_unimportant = rng.normal( + loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set) +) +X = np.hstack([X_important, X_unimportant]) + +# simulate the binary target variable +y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples) + +# %% +# Perform hypothesis testing using Mutual Information +# --------------------------------------------------- +n_estimators = 100 +max_features = 1.0 +test_size = 0.2 +n_repeats = 500 + +# TODO: This can be improved since HonestForestClassifier should be able to extract +# the relevant hyperparameters +est = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=n_estimators, + max_features=max_features, + tree_estimator=DecisionTreeClassifier(), + random_state=seed, + honest_fraction=0.7, + ), + random_state=seed, + test_size=test_size, + permute_per_tree=True, + sample_dataset_per_tree=True, +) + +# we test for the first feature set, which is important and thus should return a pvalue < 0.05 +stat, pvalue = est.test( + X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats +) +print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}") + +# we test for the second feature set, which is important and thus should return a pvalue > 0.05 +stat, pvalue = est.test( + X, + y, + covariate_index=np.arange(n_features_set, dtype=int) + n_features_set, + metric="mi", + n_repeats=n_repeats, +) +print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}") diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index df37c7b62..57ab24ea4 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -377,6 +377,9 @@ class FeatureImportanceForestRegressor(BaseForestHT): verbose : int, default=0 Controls the verbosity when fitting and predicting. + test_size : float, default=0.2 + Proportion of samples per tree to use for the test set. + permute_per_tree : bool, default=True Whether to permute the covariate index per tree or per forest. @@ -554,6 +557,9 @@ class FeatureImportanceForestClassifier(BaseForestHT): verbose : int, default=0 Controls the verbosity when fitting and predicting. + test_size : float, default=0.2 + Proportion of samples per tree to use for the test set. + permute_per_tree : bool, default=True Whether to permute the covariate index per tree or per forest. diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 026b89eca..6512f28ae 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -387,9 +387,6 @@ class PermutationForestClassifier(BasePermutationForest): test_size : float, default=0.2 The proportion of samples to leave out for each tree to compute metric on. - n_jobs : int, default=None - The number of jobs to run in parallel. - random_state : int, RandomState instance or None, default=None Controls both the randomness of the bootstrapping of the samples used when building trees (if ``bootstrap=True``) and the sampling of the diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index f015a0273..ba020a403 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -29,6 +29,50 @@ iris_y = iris_y[p] +def test_featureimportance_forest_permute_pertree(): + est = FeatureImportanceForestClassifier( + estimator=RandomForestClassifier( + n_estimators=10, + ), + permute_per_tree=True, + sample_dataset_per_tree=True, + ) + est.statistic(iris_X[:10], iris_y[:10]) + + assert ( + len(est.train_test_samples_[0][1]) == 10 * est.test_size + ), f"{len(est.train_test_samples_[0][1])} {10 * est.test_size}" + assert len(est.train_test_samples_[0][0]) == est._n_samples_ - 10 * est.test_size + + est.test(iris_X[:10], iris_y[:10], [0, 1], n_repeats=10, metric="mse") + assert ( + len(est.train_test_samples_[0][1]) == 10 * est.test_size + ), f"{len(est.train_test_samples_[0][1])} {10 * est.test_size}" + assert len(est.train_test_samples_[0][0]) == est._n_samples_ - 10 * est.test_size + + +def test_featureimportance_forest_errors(): + permute_per_tree = False + sample_dataset_per_tree = True + est = FeatureImportanceForestClassifier( + estimator=RandomForestClassifier( + n_estimators=10, + ), + permute_per_tree=permute_per_tree, + sample_dataset_per_tree=sample_dataset_per_tree, + ) + with pytest.raises(ValueError, match="sample_dataset_per_tree"): + est.statistic(iris_X[:10], iris_y[:10]) + + est = FeatureImportanceForestClassifier(estimator=RandomForestRegressor) + with pytest.raises(RuntimeError, match="Estimator must be"): + est.statistic(iris_X[:10], iris_y[:10]) + + est = FeatureImportanceForestRegressor(estimator=RandomForestClassifier) + with pytest.raises(RuntimeError, match="Estimator must be"): + est.statistic(iris_X[:10], iris_y[:10]) + + @flaky(max_runs=3) @pytest.mark.slowtest @pytest.mark.parametrize( @@ -215,8 +259,12 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, ObliqueDecisionTreeClassifier(), ], ) -@pytest.mark.parametrize("limit", [0.05, 0.1]) -def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): +@pytest.mark.parametrize("permute_per_tree", [True, False]) +@pytest.mark.parametrize("sample_dataset_per_tree", [True, False]) +def test_iris_pauc_statistic( + criterion, honest_prior, estimator, permute_per_tree, sample_dataset_per_tree +): + limit = 0.1 max_features = "sqrt" n_repeats = 200 n_estimators = 100 @@ -234,14 +282,17 @@ def test_iris_pauc_statistic(criterion, honest_prior, estimator, limit): n_jobs=-1, ), test_size=test_size, - sample_dataset_per_tree=True, - permute_per_tree=True, + sample_dataset_per_tree=sample_dataset_per_tree, + permute_per_tree=permute_per_tree, ) # now add completely uninformative feature X = np.hstack((iris_X, rng.standard_normal(size=(iris_X.shape[0], 4)))) # test for unimportant feature set clf.reset() + if sample_dataset_per_tree and not permute_per_tree: + # test in another test + pytest.skip() stat, pvalue = clf.test( X, iris_y, diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 9b1c3debe..9d06dc8fd 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -15,11 +15,30 @@ from sktree._lib.sklearn.tree import DecisionTreeClassifier -def _mutual_information(y_true, y_pred_proba): +def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: + """Compute estimate of mutual information. + + Parameters + ---------- + y_true : ArrayLike of shape (n_samples,) + _description_ + y_pred_proba : ArrayLike of shape (n_samples, n_outputs) + Posterior probabilities. + + Returns + ------- + float : + The estimated MI. + """ + if y_true.squeeze().ndim != 1: + raise ValueError(f"y_true must be 1d, not {y_true.shape}") + + # entropy averaged over n_samples H_YX = np.mean(entropy(y_pred_proba, base=np.exp(1), axis=1)) + # empirical count of each class (n_classes) _, counts = np.unique(y_true, return_counts=True) H_Y = entropy(counts, base=np.exp(1)) - return max(H_Y - H_YX, 0) + return H_Y - H_YX METRIC_FUNCTIONS = { diff --git a/sktree/tree/__init__.py b/sktree/tree/__init__.py index c4a706c99..be8baf5db 100644 --- a/sktree/tree/__init__.py +++ b/sktree/tree/__init__.py @@ -1,3 +1,4 @@ +from .._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from ._classes import ( ExtraObliqueDecisionTreeClassifier, ExtraObliqueDecisionTreeRegressor, @@ -22,4 +23,6 @@ "PatchObliqueDecisionTreeClassifier", "PatchObliqueDecisionTreeRegressor", "HonestTreeClassifier", + "DecisionTreeClassifier", + "DecisionTreeRegressor", ] diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py index 4ffa72c0f..43abe93fe 100644 --- a/sktree/tree/_honest_tree.py +++ b/sktree/tree/_honest_tree.py @@ -1,10 +1,9 @@ # Authors: Ronan Perry, Sambit Panda, Haoyin Xu # Adopted from: https://github.com/neurodata/honest-forests -from copy import deepcopy - import numpy as np -from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context +from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context, clone +from sklearn.ensemble._base import _set_random_states from sklearn.utils.multiclass import _check_partial_fit_first_call, check_classification_targets from sklearn.utils.validation import check_is_fitted, check_X_y @@ -536,7 +535,7 @@ def _fit( _sample_weight[self.honest_indices_] = 0 - if not self.tree_estimator: + if self.tree_estimator is None: self.estimator_ = DecisionTreeClassifier( criterion=self.criterion, splitter=self.splitter, @@ -555,7 +554,28 @@ def _fit( ) else: # XXX: maybe error out if the tree_estimator is already fitted - self.estimator_ = deepcopy(self.tree_estimator) + self.estimator_ = clone(self.tree_estimator) + self.estimator_.set_params( + **dict( + criterion=self.criterion, + splitter=self.splitter, + max_depth=self.max_depth, + min_samples_split=self.min_samples_split, + min_samples_leaf=self.min_samples_leaf, + min_weight_fraction_leaf=self.min_weight_fraction_leaf, + max_features=self.max_features, + max_leaf_nodes=self.max_leaf_nodes, + class_weight=self.class_weight, + random_state=self.random_state, + min_impurity_decrease=self.min_impurity_decrease, + ccp_alpha=self.ccp_alpha, + monotonic_cst=self.monotonic_cst, + store_leaf_values=self.store_leaf_values, + ) + ) + + if self.random_state is not None: + _set_random_states(self.estimator_, self.random_state) # Learn structure on subsample self.estimator_._fit( From 8c1d15c034948e4c23db2647a38b597ca78bca91 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 23:50:56 -0400 Subject: [PATCH 32/70] Updated example Signed-off-by: Adam Li --- ...t_MI_gigantic_hypothesis_testing_forest.py | 33 +++++++++++++------ sktree/stats/forestht.py | 20 +++++++---- sktree/stats/permutationforest.py | 3 -- sktree/stats/tests/test_forestht.py | 13 ++++++-- sktree/tree/_honest_tree.py | 18 ++++++++-- 5 files changed, 62 insertions(+), 25 deletions(-) diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 2af3e4412..a8ed7fe1f 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -3,10 +3,11 @@ Mutual Information for Gigantic Hypothesis Testing (MIGHT) =========================================================== -An example using :class:`~sktree.FeatureImportanceForestClassifier` for nonparametric +An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric multivariate hypothesis test, on simulated datasets. Here, we present a simulation of how MIGHT is used to test the hypothesis that a "feature set is important for -predicting the target". +predicting the target". This is a generalization of the framework presented in +:footcite:`coleman2022scalable`. We simulate a dataset with 1000 features, 500 samples, and a binary class target variable. Within each feature set, there is 500 features associated with one feature @@ -19,18 +20,16 @@ ``H0: I(X; y) - I(X, W; y) = 0`` ``HA: I(X; y) - I(X, W; y) > 0`` -where ``I`` is mutual information. - -We present causal settings where this would be true: +where ``I`` is mutual information. For example, this could be true in the following settings, +where X is our informative feature set and W is our uninformative feature set. - ``W X -> y``: here ``W`` is completely disconnected from X and y. - ``W -> X -> y``: here ``W`` is d-separated from y given X. -- ``W -> y <- X``: here ``W`` is a weak predictor of y, and X is a strong predictor of y. -- ``X <- W -> y; X -> y``: here ``W`` is a weak confounder of the relationship between X and y. +- ``W <- X -> y``: here ``W`` is d-separated from y given X. We then use MIGHT to test the hypothesis that the first feature set is important for predicting the target, and the second feature set is not important for predicting the -target. We use :class:`~sktree.FeatureImportanceForestClassifier`. +target. We use :class:`~sktree.stats.FeatureImportanceForestClassifier`. """ import numpy as np @@ -80,13 +79,22 @@ # %% # Perform hypothesis testing using Mutual Information # --------------------------------------------------- +# Here, we use :class:`~sktree.stats.FeatureImportanceForestClassifier` to perform the hypothesis +# test. The test statistic is computed by comparing the metric (i.e. mutual information) estimated +# between two forests. One forest is trained on the original dataset, and one forest is trained +# on a permuted dataset, where the rows of the ``covariate_index`` columns are shuffled randomly. +# +# The null distribution is then estimated in an efficient manner using the framework of +# :footcite:`coleman2022scalable`. The sample evaluations of each forest (i.e. the posteriors) +# are sampled randomly ``n_repeats`` times to generate a null distribution. The pvalue is then +# computed as the proportion of samples in the null distribution that are less than the +# observed test statistic. + n_estimators = 100 max_features = 1.0 test_size = 0.2 n_repeats = 500 -# TODO: This can be improved since HonestForestClassifier should be able to extract -# the relevant hyperparameters est = FeatureImportanceForestClassifier( estimator=HonestForestClassifier( n_estimators=n_estimators, @@ -116,3 +124,8 @@ n_repeats=n_repeats, ) print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}") + +# %% +# References +# ---------- +# .. footbibliography:: diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 57ab24ea4..9f8016684 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -250,8 +250,6 @@ def test( The index array of covariates to shuffle, by default None. metric : str, optional The metric to compute, by default "mse". - test_size : float, optional - Proportion of samples per tree to use for the test set, by default 0.2. n_repeats : int, optional Number of times to sample the null distribution, by default 1000. return_posteriors : bool, optional @@ -315,6 +313,8 @@ def test( y_test = y[indices_test, :] else: y_test = y + print("y_test: ", y_test.shape) + print(observe_posteriors.shape, permute_posteriors.shape) metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, y_pred_proba_normal=observe_posteriors, @@ -647,13 +647,13 @@ def _statistic( if self.permute_per_tree: if predict_posteriors: - posterior_arr = np.zeros( - (self.n_estimators, self.n_samples_test_, estimator.n_classes_) + posterior_arr = np.full( + (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan ) else: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.zeros( - (self.n_estimators, self.n_samples_test_, estimator.n_outputs_) + posterior_arr = np.full( + (self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan ) for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): @@ -667,6 +667,7 @@ def _statistic( y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) # Fill test set posteriors & set rest NaN + # TODO: refactor so posterior_arr is just a large NaN array posterior_arr[idx, indices_test, :] = y_pred # posterior # Average all posteriors (n_samples_test, n_outputs) @@ -679,6 +680,7 @@ def _statistic( # Ignore all NaN values (samples not tested) y_true_final = y[nonnan_indices, :] posterior_final = posterior_final[nonnan_indices, :] + print("Inside _statistic: ", y_true_final.shape, posterior_final.shape) else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -710,6 +712,9 @@ def _statistic( y_true_final = y_test posterior_final = y_pred + print("False false: ", posterior_final.shape, y_true_final.shape) + print(y_true_final) + print(posterior_final) if metric == "auc": # at this point, posterior_final is the predicted posterior for only the positive class # as more than one output is not supported. @@ -720,6 +725,9 @@ def _statistic( f"AUC metric is not supported for {self._type_of_target} targets." ) + if np.isnan(posterior_final).any(): + raise RuntimeError("NaN values encountered in posterior_final.") + stat = metric_func(y_true_final, posterior_final, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) diff --git a/sktree/stats/permutationforest.py b/sktree/stats/permutationforest.py index 6512f28ae..4a27f539f 100644 --- a/sktree/stats/permutationforest.py +++ b/sktree/stats/permutationforest.py @@ -215,9 +215,6 @@ def test( The covariate indices of ``X`` to shuffle. metric : str, optional Metric to compute, by default "mse". - test_size : float, optional - Size of the samples to leave out for each tree to compute posteriors on, - by default 0.2. n_repeats : int, optional Number of times to sample the null distribution, by default 1000. return_posteriors : bool, optional diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index ba020a403..18cc507e3 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -250,7 +250,7 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @flaky(max_runs=3) @pytest.mark.parametrize("criterion", ["gini", "entropy"]) -@pytest.mark.parametrize("honest_prior", ["empirical", "uniform", "ignore"]) +@pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( "estimator", [ @@ -259,7 +259,13 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, ObliqueDecisionTreeClassifier(), ], ) -@pytest.mark.parametrize("permute_per_tree", [True, False]) +@pytest.mark.parametrize( + "permute_per_tree", + [ + True, + False, + ], +) @pytest.mark.parametrize("sample_dataset_per_tree", [True, False]) def test_iris_pauc_statistic( criterion, honest_prior, estimator, permute_per_tree, sample_dataset_per_tree @@ -268,7 +274,7 @@ def test_iris_pauc_statistic( max_features = "sqrt" n_repeats = 200 n_estimators = 100 - test_size = 0.1 + test_size = 0.2 # Check consistency on dataset iris. clf = FeatureImportanceForestClassifier( @@ -293,6 +299,7 @@ def test_iris_pauc_statistic( if sample_dataset_per_tree and not permute_per_tree: # test in another test pytest.skip() + stat, pvalue = clf.test( X, iris_y, diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py index 43abe93fe..5b6c8f6a1 100644 --- a/sktree/tree/_honest_tree.py +++ b/sktree/tree/_honest_tree.py @@ -1,6 +1,8 @@ # Authors: Ronan Perry, Sambit Panda, Haoyin Xu # Adopted from: https://github.com/neurodata/honest-forests +import inspect + import numpy as np from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context, clone from sklearn.ensemble._base import _set_random_states @@ -568,12 +570,22 @@ def _fit( class_weight=self.class_weight, random_state=self.random_state, min_impurity_decrease=self.min_impurity_decrease, - ccp_alpha=self.ccp_alpha, - monotonic_cst=self.monotonic_cst, - store_leaf_values=self.store_leaf_values, ) ) + # TODO: refactor oblique trees to have these parameters by default, but not used + init_signature = inspect.signature(self.estimator_.__init__) + if "ccp_alpha" in init_signature.parameters: + self.estimator_.set_params(**dict(ccp_alpha=self.ccp_alpha)) + if "store_leaf_values" in init_signature.parameters: + self.estimator_.set_params( + **dict( + store_leaf_values=self.store_leaf_values, + ) + ) + if "monotonic_cst" in init_signature.parameters: + self.self.estimator_.set_params(**dict(monotonic_cst=self.monotonic_cst)) + if self.random_state is not None: _set_random_states(self.estimator_, self.random_state) From c40e866363458e2e2fc8a5dde41669022edb78ae Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 19 Sep 2023 23:58:52 -0400 Subject: [PATCH 33/70] Fix bug Signed-off-by: Adam Li --- sktree/tree/_honest_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py index 5b6c8f6a1..8e0f2321d 100644 --- a/sktree/tree/_honest_tree.py +++ b/sktree/tree/_honest_tree.py @@ -584,7 +584,7 @@ def _fit( ) ) if "monotonic_cst" in init_signature.parameters: - self.self.estimator_.set_params(**dict(monotonic_cst=self.monotonic_cst)) + self.estimator_.set_params(**dict(monotonic_cst=self.monotonic_cst)) if self.random_state is not None: _set_random_states(self.estimator_, self.random_state) From a6b1a0c7c789c5ff95d0be8f3434322434cc5f48 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 00:08:50 -0400 Subject: [PATCH 34/70] Fix example Signed-off-by: Adam Li --- examples/plot_MI_gigantic_hypothesis_testing_forest.py | 6 ++++-- pyproject.toml | 2 +- sktree/stats/forestht.py | 7 ------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index a8ed7fe1f..75078a20f 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -90,10 +90,11 @@ # computed as the proportion of samples in the null distribution that are less than the # observed test statistic. -n_estimators = 100 -max_features = 1.0 +n_estimators = 125 +max_features = "sqrt" test_size = 0.2 n_repeats = 500 +n_jobs = -1 est = FeatureImportanceForestClassifier( estimator=HonestForestClassifier( @@ -102,6 +103,7 @@ tree_estimator=DecisionTreeClassifier(), random_state=seed, honest_fraction=0.7, + n_jobs=n_jobs, ), random_state=seed, test_size=test_size, diff --git a/pyproject.toml b/pyproject.toml index 7bb840d16..d032d2c5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -166,7 +166,7 @@ _cythonlint = 'cython-lint sktree/ --exclude "sktree/_lib/*"' _black_check = 'black --check sktree examples' _isort_check = 'isort --check .' _pydocstyle = 'pydocstyle ./sktree' -_codespell = 'codespell sktree/ doc/ examples/ --ignore-words=.codespellignore --skip "**/_build/*,**/_lib/*,doc/_build/*,doc/auto_examples/*,doc/tutorials/*"' +_codespell = 'codespell sktree/ doc/ examples/ --ignore-words=.codespellignore --skip "**/_build/*,**/_lib/*,doc/_build/*,doc/auto_examples/*,doc/tutorials/*,doc/generated/*"' _changelog = 'semversioner changelog > CHANGELOG.md' _apply_version = 'semversioner release' diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 9f8016684..c8280ce41 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -313,8 +313,6 @@ def test( y_test = y[indices_test, :] else: y_test = y - print("y_test: ", y_test.shape) - print(observe_posteriors.shape, permute_posteriors.shape) metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, y_pred_proba_normal=observe_posteriors, @@ -680,7 +678,6 @@ def _statistic( # Ignore all NaN values (samples not tested) y_true_final = y[nonnan_indices, :] posterior_final = posterior_final[nonnan_indices, :] - print("Inside _statistic: ", y_true_final.shape, posterior_final.shape) else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -711,10 +708,6 @@ def _statistic( samples = indices_test y_true_final = y_test posterior_final = y_pred - - print("False false: ", posterior_final.shape, y_true_final.shape) - print(y_true_final) - print(posterior_final) if metric == "auc": # at this point, posterior_final is the predicted posterior for only the positive class # as more than one output is not supported. From 21f3b224652ca37e547cbb33f5d63aa6223c1bff Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 00:30:18 -0400 Subject: [PATCH 35/70] Fix unit-test Signed-off-by: Adam Li --- sktree/tests/test_honest_forest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sktree/tests/test_honest_forest.py b/sktree/tests/test_honest_forest.py index 6f2e3daae..91fce420e 100644 --- a/sktree/tests/test_honest_forest.py +++ b/sktree/tests/test_honest_forest.py @@ -79,12 +79,14 @@ def test_iris(criterion, max_features, honest_prior, estimator): ], ) def test_iris_multi(criterion, max_features, honest_prior, estimator): + n_estimators = 10 + # Check consistency on dataset iris. clf = HonestForestClassifier( criterion=criterion, random_state=0, max_features=max_features, - n_estimators=10, + n_estimators=n_estimators, honest_prior=honest_prior, tree_estimator=estimator, ) @@ -101,7 +103,7 @@ def test_iris_multi(criterion, max_features, honest_prior, estimator): score = r2_score(clf.predict(X), y) if honest_prior == "ignore": assert ( - score > 0.6 and score < 1.0 + score > 0.4 and score < 1.0 ), "Failed with {0}, criterion = {1} and score = {2}".format( "HForest", criterion, score ) From c8af371ba75789703f96eb1d3e2c0025af8b0840 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 09:44:19 -0400 Subject: [PATCH 36/70] Remove unnecessary doc string Signed-off-by: Adam Li --- sktree/ensemble/_eiforest.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sktree/ensemble/_eiforest.py b/sktree/ensemble/_eiforest.py index ba755ec27..c56b65c69 100644 --- a/sktree/ensemble/_eiforest.py +++ b/sktree/ensemble/_eiforest.py @@ -40,10 +40,6 @@ class ExtendedIsolationForest(IsolationForest): original paper. - If float, the contamination should be in the range (0, 0.5]. - .. versionchanged:: 0.22 - The default value of ``contamination`` changed from 0.1 - to ``'auto'``. - max_features : int or float, default=1.0 The number of features to draw from X to train each base estimator. From fd5a63c6af80e0d94518b2a4129c8a10d25d8552 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Wed, 20 Sep 2023 10:15:12 -0400 Subject: [PATCH 37/70] DOC correct result evaluation comment Co-authored-by: Adam Li --- examples/plot_MI_gigantic_hypothesis_testing_forest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 75078a20f..8408f4e03 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -117,7 +117,7 @@ ) print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}") -# we test for the second feature set, which is important and thus should return a pvalue > 0.05 +# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05 stat, pvalue = est.test( X, y, From eee4cbc79b0cbf6cd94862737b1830210d7785be Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 10:21:34 -0400 Subject: [PATCH 38/70] Try redirect Signed-off-by: Adam Li --- .github/workflows/circle_artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/circle_artifacts.yml b/.github/workflows/circle_artifacts.yml index 09a231ef8..971e00d15 100644 --- a/.github/workflows/circle_artifacts.yml +++ b/.github/workflows/circle_artifacts.yml @@ -17,7 +17,7 @@ jobs: uses: larsoner/circleci-artifacts-redirector-action@master with: repo-token: ${{ secrets.GITHUB_TOKEN }} - api-token: ${{ secrets.CIRCLE_TOKEN }} + api-token: ${{ secrets.CIRCLECI_TOKEN }} artifact-path: 0/dev/index.html circleci-jobs: build_docs job-title: Check the rendered docs here! From 527397d15ccc76af0c304321dd08c4198db06498 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 11:05:37 -0400 Subject: [PATCH 39/70] Try again Signed-off-by: Adam Li --- .github/workflows/circle_artifacts.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/circle_artifacts.yml b/.github/workflows/circle_artifacts.yml index 971e00d15..40ec84b30 100644 --- a/.github/workflows/circle_artifacts.yml +++ b/.github/workflows/circle_artifacts.yml @@ -4,13 +4,14 @@ on: [status] # Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this # github actions workflow: # https://docs.github.com/en/actions/security-guides/automatic-token-authentication -permissions: - statuses: write +permissions: read-all jobs: circleci_artifacts_redirector_job: runs-on: ubuntu-20.04 if: "github.repository == 'neurodata/scikit-tree' && github.event.context == 'ci/circleci: build_docs'" + permissions: + statuses: write name: Run CircleCI artifacts redirector steps: - name: GitHub Action step From fe848a4af4963af6223e5158e561ef96325f1457 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 20 Sep 2023 11:07:56 -0400 Subject: [PATCH 40/70] Try again Signed-off-by: Adam Li --- sktree/stats/tests/test_forestht.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 18cc507e3..62ffd0a09 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -248,7 +248,7 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, assert pvalue > 0.05, f"pvalue: {pvalue}" -@flaky(max_runs=3) +@flaky(max_runs=2) @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( From 9a052359f34ac9cda43931b8a7b7f545e0051691 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 21 Sep 2023 12:02:25 -0400 Subject: [PATCH 41/70] Improve the checking inputs of feature importance forests Signed-off-by: Adam Li --- sktree/stats/forestht.py | 74 +++++++++++++++++++++++++---- sktree/stats/tests/test_forestht.py | 45 ++++++++++++++++++ 2 files changed, 109 insertions(+), 10 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index c8280ce41..1cf054bd2 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -43,6 +43,12 @@ def __init__( self.permute_per_tree = permute_per_tree self.sample_dataset_per_tree = sample_dataset_per_tree + self.n_samples_test_ = None + self._n_samples_ = None + self._covariate_index_cache_ = None + self._type_of_target_ = None + self.n_features_in_ = None + @property def n_estimators(self): return self.estimator_.n_estimators @@ -107,6 +113,29 @@ def _statistic( ): raise NotImplementedError("Subclasses should implement this!") + def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None): + X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) + if y.ndim != 2: + y = y.reshape(-1, 1) + + if self._n_samples_ is not None and X.shape[0] != self._n_samples_: + raise RuntimeError( + f"X must have {self._n_samples_} samples, got {X.shape[0]}. " + f"If running on a new dataset, call the 'reset' method." + ) + if self.n_features_in_ is not None and X.shape[1] != self.n_features_in_: + raise RuntimeError( + f"X must have {self.n_features_in_} features, got {X.shape[1]}. " + f"If running on a new dataset, call the 'reset' method." + ) + if self._type_of_target_ is not None and type_of_target(y) != self._type_of_target_: + raise RuntimeError( + f"y must have type {self._type_of_target_}, got {type_of_target(y)}. " + f"If running on a new dataset, call the 'reset' method." + ) + + return X, y, covariate_index + def statistic( self, X: ArrayLike, @@ -151,9 +180,12 @@ def statistic( least one tree in the posterior computation. """ if check_input: - X, y = check_X_y(X, y, ensure_2d=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) + X, y, covariate_index = self._check_input(X, y, covariate_index) + + if self._n_samples_ is None: + self._n_samples_, self.n_features_in_ = X.shape + if self._type_of_target_ is None: + self._type_of_target_ = type_of_target(y) if self.sample_dataset_per_tree and not self.permute_per_tree: raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True") @@ -167,7 +199,7 @@ def statistic( # Infer type of target y if not hasattr(self, "_type_of_target"): - self._type_of_target = type_of_target(y) + self._type_of_target_ = type_of_target(y) # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking # and then setting the internal meta data structures @@ -264,9 +296,7 @@ def test( pval : float The p-value of the test statistic. """ - X, y = check_X_y(X, y, ensure_2d=True, copy=True, multi_output=True) - if y.ndim != 2: - y = y.reshape(-1, 1) + X, y, covariate_index = self._check_input(X, y, covariate_index) if not hasattr(self, "samples_"): # first compute the test statistic on the un-permuted data @@ -411,6 +441,17 @@ class FeatureImportanceForestRegressor(BaseForestHT): null_dist_ : ArrayLike of shape (n_repeats,) The null distribution of the test statistic. + Notes + ----- + This class trains two forests: one on the original dataset, and one on the + permuted dataset. The forest from the original dataset is cached and re-used to + compute the test-statistic each time the :meth:`test` method is called. However, + the forest from the permuted dataset is re-trained each time the :meth:`test` is called + if the ``covariate_index`` differs from the previous run. + + To fully start from a new dataset, call the :meth:`reset` method, which will then + re-train both forests upon calling the :meth:`test` and :meth:`statistic` methods. + References ---------- .. footbibliography:: @@ -591,6 +632,17 @@ class FeatureImportanceForestClassifier(BaseForestHT): null_dist_ : ArrayLike of shape (n_repeats,) The null distribution of the test statistic. + Notes + ----- + This class trains two forests: one on the original dataset, and one on the + permuted dataset. The forest from the original dataset is cached and re-used to + compute the test-statistic each time the :meth:`test` method is called. However, + the forest from the permuted dataset is re-trained each time the :meth:`test` is called + if the ``covariate_index`` differs from the previous run. + + To fully start from a new dataset, call the :meth:`reset` method, which will then + re-train both forests upon calling the :meth:`test` and :meth:`statistic` methods. + References ---------- .. footbibliography:: @@ -630,7 +682,7 @@ def _statistic( X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, - metric="mse", + metric="mi", return_posteriors: bool = False, **metric_kwargs, ): @@ -696,6 +748,8 @@ def _statistic( ) X_train[:, covariate_index] = X_train[index_arr, covariate_index] + if self._type_of_target_ == "binary": + y_train = y_train.ravel() estimator.fit(X_train, y_train) if predict_posteriors: @@ -711,11 +765,11 @@ def _statistic( if metric == "auc": # at this point, posterior_final is the predicted posterior for only the positive class # as more than one output is not supported. - if self._type_of_target == "binary": + if self._type_of_target_ == "binary": posterior_final = posterior_final[:, 1] else: raise RuntimeError( - f"AUC metric is not supported for {self._type_of_target} targets." + f"AUC metric is not supported for {self._type_of_target_} targets." ) if np.isnan(posterior_final).any(): diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 62ffd0a09..00b42832a 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -317,3 +317,48 @@ def test_iris_pauc_statistic( score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) assert score >= 0.8, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) + + +@pytest.mark.parametrize( + "forest_hyppo", + [ + FeatureImportanceForestRegressor( + estimator=RandomForestRegressor( + n_estimators=10, + ), + random_state=seed, + ), + FeatureImportanceForestClassifier( + estimator=RandomForestClassifier( + n_estimators=10, + ), + random_state=seed, + permute_per_tree=False, + sample_dataset_per_tree=False, + ), + ], +) +def test_forestht_check_inputs(forest_hyppo): + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + + # Test case 1: Valid input + forest_hyppo.statistic(X, y) + + # Test case 2: Invalid input with different number of samples + X_invalid = np.random.rand(n_samples + 1, X.shape[1]) + y_invalid = rng.integers(0, 2, size=n_samples + 1) + with pytest.raises(RuntimeError, match="X must have"): + forest_hyppo.statistic(X_invalid, y_invalid) + + # Test case 3: Invalid input with different number of features + X_invalid = np.random.rand(X.shape[0], n_features + 1) + with pytest.raises(RuntimeError, match="X must have"): + forest_hyppo.statistic(X_invalid, y) + + # Test case 4: Invalid input with incorrect y type target + y_invalid = np.random.rand(X.shape[0]) + with pytest.raises(RuntimeError, match="y must have type"): + forest_hyppo.statistic(X, y_invalid) From dddacb11279b5944c8a1b563f89a18d014e3dbba Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 21 Sep 2023 12:21:12 -0400 Subject: [PATCH 42/70] Fix unit test Signed-off-by: Adam Li --- .../compare_coleman_and_permutation_forest.ipynb} | 0 sktree/stats/forestht.py | 14 +++++++++----- sktree/stats/tests/test_forestht.py | 5 +++++ 3 files changed, 14 insertions(+), 5 deletions(-) rename benchmarks_nonasv/{test_permutation_forest.ipynb => notebooks/compare_coleman_and_permutation_forest.ipynb} (100%) diff --git a/benchmarks_nonasv/test_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb similarity index 100% rename from benchmarks_nonasv/test_permutation_forest.ipynb rename to benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 1cf054bd2..e4e4a9801 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -61,6 +61,12 @@ def reset(self): if attr_name.endswith("_") and attr_name not in class_attributes: delattr(self, attr_name) + self.n_samples_test_ = None + self._n_samples_ = None + self._covariate_index_cache_ = None + self._type_of_target_ = None + self.n_features_in_ = None + def _get_estimators_indices(self): indices = np.arange(self._n_samples_, dtype=int) @@ -221,13 +227,11 @@ def statistic( estimator.fit(X[:2], y[:2]) # permute per tree - n_samples = X.shape[0] - self._n_samples_ = n_samples if self.sample_dataset_per_tree: - self.n_samples_test_ = n_samples + self.n_samples_test_ = self._n_samples_ else: # not permute per tree - test_size_ = int(self.test_size * n_samples) + test_size_ = int(self.test_size * self._n_samples_) # Fit each tree and compute posteriors with train test splits self.n_samples_test_ = test_size_ @@ -298,7 +302,7 @@ def test( """ X, y, covariate_index = self._check_input(X, y, covariate_index) - if not hasattr(self, "samples_"): + if self._n_samples_ is None: # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( X, diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 00b42832a..54b3f3e2c 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -315,6 +315,11 @@ def test_iris_pauc_statistic( print(pvalue) assert pvalue < 0.05, f"pvalue: {pvalue}" + # one must call `reset()` to make sure the test is run on a "new" feature set properly + with pytest.raises(RuntimeError, match="X must have 8 features"): + clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) + + clf.reset() score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) assert score >= 0.8, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) From 33c9a74020446cf7253a869ac88c14913b7ce2c2 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 21 Sep 2023 12:51:53 -0400 Subject: [PATCH 43/70] Try again Signed-off-by: Adam Li --- doc/conf.py | 1 + sktree/stats/forestht.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index fbab7cddd..da50fd4bd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -216,6 +216,7 @@ "Tree", "_type_", "MetadataRequest", + "sklearn.utils.metadata_routing.MetadataRequest", "~utils.metadata_routing.MetadataRequest", "quantiles", "n_quantiles", diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index e4e4a9801..fd12873d3 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -453,7 +453,7 @@ class FeatureImportanceForestRegressor(BaseForestHT): the forest from the permuted dataset is re-trained each time the :meth:`test` is called if the ``covariate_index`` differs from the previous run. - To fully start from a new dataset, call the :meth:`reset` method, which will then + To fully start from a new dataset, call the ``reset`` method, which will then re-train both forests upon calling the :meth:`test` and :meth:`statistic` methods. References @@ -644,7 +644,7 @@ class FeatureImportanceForestClassifier(BaseForestHT): the forest from the permuted dataset is re-trained each time the :meth:`test` is called if the ``covariate_index`` differs from the previous run. - To fully start from a new dataset, call the :meth:`reset` method, which will then + To fully start from a new dataset, call the ``reset`` method, which will then re-train both forests upon calling the :meth:`test` and :meth:`statistic` methods. References From 64f20175f79e449794f61757317866c4c2cb3e93 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 21 Sep 2023 13:17:03 -0400 Subject: [PATCH 44/70] Fix docs Signed-off-by: Adam Li --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index da50fd4bd..fa949235e 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -88,6 +88,7 @@ nitpick_ignore = [ ("py:mod", "sktree.tree"), ("py:mod", "sktree.stats"), + ("py:class", "sklearn.utils.metadata_routing.MetadataRequest"), ] # The name of a reST role (builtin or Sphinx extension) to use as the default From b8dc3a22ca3b6f9285101ee20dce231a04b2434d Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 28 Sep 2023 11:07:06 -0400 Subject: [PATCH 45/70] Fix pvalue sampling Signed-off-by: Adam Li --- sktree/stats/forestht.py | 204 ++++++++++++++++++---------- sktree/stats/tests/test_forestht.py | 4 +- sktree/stats/utils.py | 45 +++--- 3 files changed, 162 insertions(+), 91 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index fd12873d3..750d913f9 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,3 +1,5 @@ +from typing import Callable, Dict + import numpy as np from numpy.typing import ArrayLike from sklearn.base import MetaEstimatorMixin, clone, is_classifier @@ -8,7 +10,6 @@ from sklearn.utils.validation import _is_fitted, check_X_y from sktree._lib.sklearn.ensemble._forest import ( - BaseForest, ForestClassifier, ForestRegressor, RandomForestClassifier, @@ -109,12 +110,12 @@ def train_test_samples_(self): def _statistic( self, - estimator: BaseForest, + estimator: ForestClassifier, X: ArrayLike, y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mse", - return_posteriors: bool = False, + covariate_index: ArrayLike, + metric: str, + return_posteriors: bool, **metric_kwargs, ): raise NotImplementedError("Subclasses should implement this!") @@ -175,7 +176,8 @@ def statistic( ------- stat : float The test statistic. - posterior_final : ArrayLike of shape (n_samples_final, n_outputs), optional + posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final), optional If ``return_posteriors`` is True, then the posterior probabilities of the samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` if all samples are encountered in the test set of at least one tree in the @@ -332,6 +334,7 @@ def test( # Note: at this point, both `estimator` and `permuted_estimator_` should # have been fitted already, so we can now compute on the null by resampling # the posteriors and computing the test statistic on the resampled posteriors + print(observe_posteriors.shape, permute_posteriors.shape) if self.sample_dataset_per_tree: metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y[observe_samples, :], @@ -342,11 +345,11 @@ def test( seed=self.random_state, ) else: - if not self.sample_dataset_per_tree: - _, indices_test = self.train_test_samples_[0] - y_test = y[indices_test, :] - else: - y_test = y + # If not sampling a new dataset per tree, then we may either be + # permuting the covariate index per tree or per forest. If not permuting + # there is only one train and test split, so we can just use that + _, indices_test = self.train_test_samples_[0] + y_test = y[indices_test, :] metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, y_pred_proba_normal=observe_posteriors, @@ -439,7 +442,7 @@ class FeatureImportanceForestRegressor(BaseForestHT): y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. - posterior_final_ : ArrayLike of shape (n_samples_final,) + posterior_final_ : ArrayLike of shape (n_estimators, n_samples_final) The predicted posterior probabilities of the samples used in the final test. null_dist_ : ArrayLike of shape (n_repeats,) @@ -490,16 +493,16 @@ def _get_estimator(self): def _statistic( self, - estimator: ForestRegressor, + estimator: ForestClassifier, X: ArrayLike, y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mse", - return_posteriors: bool = False, + covariate_index: ArrayLike, + metric: str, + return_posteriors: bool, **metric_kwargs, ): """Helper function to compute the test statistic.""" - metric_func = METRIC_FUNCTIONS[metric] + metric_func: Callable[[ArrayLike, ArrayLike, Dict], float] = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) if self.permute_per_tree: @@ -516,18 +519,17 @@ def _statistic( # Fill test set posteriors & set rest NaN posterior_arr[idx, indices_test, :] = y_pred # posterior - y_true_final = y[indices_test, :] - + # determine if there are any nans in the final posterior array # Average all posteriors (n_samples_test, n_outputs) - posterior_final = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr, axis=0) - # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] + # # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] samples = nonnan_indices - # Ignore all NaN values (samples not tested) + # # Ignore all NaN values (samples not tested) y_true_final = y[nonnan_indices, :] - posterior_final = posterior_final[nonnan_indices, :] + posterior_arr = posterior_arr[:, (nonnan_indices), :] else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -547,24 +549,31 @@ def _statistic( X_train[:, covariate_index] = X_train[index_arr, covariate_index] estimator.fit(X_train, y_train) - y_pred = estimator.predict(X_test) + + # construct posterior array for all trees (n_trees, n_samples_test, n_outputs) + posterior_arr = np.full( + (len(estimator.estimators_), self.n_samples_test_, estimator.n_outputs_), np.nan + ) + for itree, tree in enumerate(estimator.estimators_): + posterior_arr[itree, ...] = tree.predict(X_test) # set variables to compute metric samples = indices_test y_true_final = y_test - posterior_final = y_pred - stat = metric_func(y_true_final, posterior_final, **metric_kwargs) + # Average all posteriors (n_samples_test, n_outputs) to compute the statistic + posterior_forest = np.nanmean(posterior_arr, axis=0) + stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior self.samples_ = samples self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_final + self.posterior_final_ = posterior_arr self.stat_ = stat if return_posteriors: - return stat, posterior_final, samples + return stat, posterior_arr, samples return stat @@ -625,12 +634,13 @@ class FeatureImportanceForestClassifier(BaseForestHT): samples_ : ArrayLike of shape (n_samples_final,) The indices of the samples used in the final test set that would slice - the original ``(X, y)`` input. + the original ``(X, y)`` input along the rows. y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. - posterior_final_ : ArrayLike of shape (n_samples_final,) + posterior_final_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final) The predicted posterior probabilities of the samples used in the final test. null_dist_ : ArrayLike of shape (n_repeats,) @@ -659,7 +669,7 @@ def __init__( verbose=0, test_size=0.2, permute_per_tree=True, - sample_dataset_per_tree=True, + sample_dataset_per_tree=False, ): super().__init__( estimator=estimator, @@ -685,13 +695,13 @@ def _statistic( estimator: ForestClassifier, X: ArrayLike, y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mi", - return_posteriors: bool = False, + covariate_index: ArrayLike, + metric: str, + return_posteriors: bool, **metric_kwargs, ): """Helper function to compute the test statistic.""" - metric_func = METRIC_FUNCTIONS[metric] + metric_func: Callable[[ArrayLike, ArrayLike, Dict], float] = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) if metric in POSTERIOR_FUNCTIONS: @@ -699,41 +709,30 @@ def _statistic( else: predict_posteriors = False + if predict_posteriors: + # now initialize posterior array as (n_trees, n_samples_test, n_classes) + posterior_arr = np.full( + (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan + ) + else: + # now initialize posterior array as (n_trees, n_samples_test, n_outputs) + posterior_arr = np.full( + (self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan + ) if self.permute_per_tree: - if predict_posteriors: - posterior_arr = np.full( - (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan - ) - else: - # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.full( - (self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan - ) - for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): tree: DecisionTreeClassifier = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) if predict_posteriors: # XXX: currently assumes n_outputs_ == 1 - y_pred = tree.predict_proba(X[indices_test, :]) + y_pred = tree.predict_proba(X[indices_test, :]).reshape(-1, tree.n_classes_) else: y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) # Fill test set posteriors & set rest NaN # TODO: refactor so posterior_arr is just a large NaN array posterior_arr[idx, indices_test, :] = y_pred # posterior - - # Average all posteriors (n_samples_test, n_outputs) - posterior_final = np.nanmean(posterior_arr, axis=0) - - # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_final).any(axis=1))[0] - samples = nonnan_indices - - # Ignore all NaN values (samples not tested) - y_true_final = y[nonnan_indices, :] - posterior_final = posterior_final[nonnan_indices, :] else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -756,39 +755,104 @@ def _statistic( y_train = y_train.ravel() estimator.fit(X_train, y_train) - if predict_posteriors: - # XXX: currently assumes n_outputs_ == 1 - y_pred = estimator.predict_proba(X_test) - else: - y_pred = estimator.predict(X_test) + # construct posterior array for all trees (n_trees, n_samples_test, n_outputs) + for itree, tree in enumerate(estimator.estimators_): + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + posterior_arr[itree, indices_test, ...] = tree.predict_proba(X_test).reshape( + -1, tree.n_classes_ + ) + else: + posterior_arr[itree, indices_test, ...] = tree.predict(X_test).reshape( + -1, tree.n_outputs_ + ) # set variables to compute metric samples = indices_test y_true_final = y_test - posterior_final = y_pred if metric == "auc": # at this point, posterior_final is the predicted posterior for only the positive class # as more than one output is not supported. if self._type_of_target_ == "binary": - posterior_final = posterior_final[:, 1] + posterior_arr = posterior_arr[..., (1,)] else: raise RuntimeError( f"AUC metric is not supported for {self._type_of_target_} targets." ) - if np.isnan(posterior_final).any(): - raise RuntimeError("NaN values encountered in posterior_final.") + # determine if there are any nans in the final posterior array + # Average all posteriors (n_samples_test, n_outputs) + posterior_forest = np.nanmean(posterior_arr, axis=0) + + # # Find the row indices with NaN values in any column + nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] + samples = nonnan_indices + + # Ignore all NaN values (samples not tested) + y_true_final = y[(nonnan_indices), :] + posterior_arr = posterior_arr[:, (nonnan_indices), :] + + # Average all posteriors (n_samples_test, n_outputs) to compute the statistic + posterior_forest = np.nanmean(posterior_arr, axis=0) + stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) - stat = metric_func(y_true_final, posterior_final, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior self.samples_ = samples self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_final + self.posterior_final_ = posterior_arr self.stat_ = stat if return_posteriors: - return stat, posterior_final, samples + return stat, posterior_arr, samples return stat + + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mi", + return_posteriors: bool = False, + check_input: bool = True, + **metric_kwargs, + ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "mi", which computes Mutual Information. + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + check_input : bool, optional + Whether or not to check the input, by default True. + **metric_kwargs : dict, optional + Additional keyword arguments to pass to the metric function. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ + return super().statistic( + X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs + ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 54b3f3e2c..ba4050591 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -35,7 +35,7 @@ def test_featureimportance_forest_permute_pertree(): n_estimators=10, ), permute_per_tree=True, - sample_dataset_per_tree=True, + sample_dataset_per_tree=False, ) est.statistic(iris_X[:10], iris_y[:10]) @@ -323,6 +323,8 @@ def test_iris_pauc_statistic( score = clf.statistic(iris_X, iris_y, metric="auc", max_fpr=limit) assert score >= 0.8, "Failed with pAUC: {0} for max fpr: {1}".format(score, limit) + assert isinstance(clf.estimator_, HonestForestClassifier) + @pytest.mark.parametrize( "forest_hyppo", diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 9d06dc8fd..766fd30d9 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -169,14 +169,16 @@ def _compute_null_distribution_coleman( Parameters ---------- - X_test : ArrayLike of shape (n_samples, n_features) - The data matrix. y_test : ArrayLike of shape (n_samples, n_outputs) The output matrix. - y_pred_proba_normal : ArrayLike of shape (n_samples_normal, n_outputs) - The predicted posteriors from the normal forest. - y_pred_proba_perm : ArrayLike of shape (n_samples_perm, n_outputs) - The predicted posteriors from the permuted forest. + y_pred_proba_normal : ArrayLike of shape (n_estimators, n_samples_normal, n_outputs) + The predicted posteriors from the normal forest. Some of the trees + may have nans predicted in them, which means the tree used these samples + for training and not for prediction. + y_pred_proba_perm : ArrayLike of shape (n_estimators, n_samples_perm, n_outputs) + The predicted posteriors from the permuted forest. Some of the trees + may have nans predicted in them, which means the tree used these samples + for training and not for prediction. normal_samples : ArrayLike of shape (n_samples_normal,) The indices of the normal samples that we have a posterior for. perm_samples : ArrayLike of shape (n_samples_perm,) @@ -199,10 +201,16 @@ def _compute_null_distribution_coleman( metric_func = METRIC_FUNCTIONS[metric] # sample two sets of equal number of trees from the combined forest these are the posteriors + # (n_estimators * 2, n_samples, n_outputs) all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) + print(y_pred_proba_normal.shape) + n_estimators, _, _ = y_pred_proba_normal.shape n_samples_test = len(y_test) - if len(all_y_pred) != 2 * n_samples_test: + if all_y_pred.shape[1] != n_samples_test: + print(all_y_pred.shape) + print(n_samples_test) + print(y_test.shape) print("y_pred_proba_perm: ", y_pred_proba_perm.shape) print("y_pred_proba: ", y_pred_proba_normal.shape) @@ -212,12 +220,13 @@ def _compute_null_distribution_coleman( ) # create two stacked index arrays of y_test resulting in [1, ..., N, 1, ..., N] - y_test_ind_arr = np.hstack( - (np.arange(n_samples_test, dtype=int), np.arange(n_samples_test, dtype=int)) - ) + # where N is `n_estimators` + # y_test_ind_arr = np.hstack( + # (np.arange(n_estimators, dtype=int), np.arange(n_estimators, dtype=int)) + # ) # create index array of [1, ..., 2N] to slice into `all_y_pred` - y_pred_ind_arr = np.arange((2 * n_samples_test), dtype=int) + y_pred_ind_arr = np.arange((2 * n_estimators), dtype=int) metric_star = np.zeros((n_repeats,)) metric_star_pi = np.zeros((n_repeats,)) @@ -228,17 +237,13 @@ def _compute_null_distribution_coleman( first_forest_inds = y_pred_ind_arr[:n_samples_test] second_forest_inds = y_pred_ind_arr[:n_samples_test] - # index into y_test for first half and second half - first_half_index_test = y_test_ind_arr[first_forest_inds] - second_half_index_test = y_test_ind_arr[second_forest_inds] - - # now get the pointers to the actual samples used for the metric - y_test_first_half = y_test[first_half_index_test] - y_test_second_half = y_test[second_half_index_test] + # get random half of the posteriors + y_pred_first_half = np.nanmean(all_y_pred[first_forest_inds], axis=0) + y_pred_second_half = np.nanmean(all_y_pred[second_forest_inds], axis=0) # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test_first_half, all_y_pred[first_forest_inds]) - second_half_metric = metric_func(y_test_second_half, all_y_pred[second_forest_inds]) + first_half_metric = metric_func(y_test, y_pred_first_half) + second_half_metric = metric_func(y_test, y_pred_second_half) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From 028f17d65f62d6dddcd3b99f073f44a72877b743 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 28 Sep 2023 14:50:23 -0400 Subject: [PATCH 46/70] Fixed pvalue issue Signed-off-by: Adam Li --- ...mpare_coleman_and_permutation_forest.ipynb | 772 +++++++++++++----- sktree/stats/forestht.py | 34 +- sktree/stats/tests/test_forestht.py | 28 +- sktree/stats/utils.py | 24 +- 4 files changed, 614 insertions(+), 244 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index e2dc525b7..7c2febb11 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -249,186 +249,546 @@ "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n", - "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", - "X2/7: 0.001996007984031936\n", - "X1: 1.0\n", - "X6: 1.0\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", - "X6: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X1: 1.0\n", - "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X1: 1.0\n", - "X6: 1.0\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", - "X1: 1.0\n", - "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", - "X1: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", - "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X1: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X6: 0.001996007984031936\n", + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", "X2/7: 1.0\n", - "X2/7: 1.0\n" + "(125, 2200, 1) (125, 2200, 1)\n", + "(125, 2200, 1)\n", + "X2/7: 0.001996007984031936\n" ] } ], @@ -452,13 +812,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "9e60fac2-3b20-493e-886a-892d572a28c6", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -588,7 +948,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "36c53ff3-984d-4428-87c1-3421098e0081", "metadata": {}, "outputs": [ @@ -608,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", "metadata": { "scrolled": true @@ -618,140 +978,251 @@ "name": "stdout", "output_type": "stream", "text": [ + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", - "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", - "X500: 1.0\n", - "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 1.0\n", - "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", - "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", - "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", - "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 0.004975124378109453\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", - "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", - "X500: 1.0\n", - "X2: 0.004975124378109453\n", - "X1: 1.0\n", - "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n" ] } @@ -761,7 +1232,7 @@ "rng = np.random.default_rng(seed)\n", "\n", "beta_space = np.hstack((np.linspace(0.01, 2.5, 8), np.linspace(5, 20, 7)))\n", - "beta_space = j_space.copy()\n", + "# beta_space = j_space.copy()\n", "for beta in beta_space:\n", " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", @@ -776,129 +1247,30 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "done\n" - ] - } - ], + "outputs": [], "source": [ "print(\"done\")" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
X2X1X500sigma_factor
00.0049750.0049751.0000000.005
10.0049750.0049751.0000000.005
21.0000001.0000001.0000000.005
31.0000000.0049751.0000000.005
41.0000001.0000000.0049750.005
\n", - "
" - ], - "text/plain": [ - " X2 X1 X500 sigma_factor\n", - "0 0.004975 0.004975 1.000000 0.005\n", - "1 0.004975 0.004975 1.000000 0.005\n", - "2 1.000000 1.000000 1.000000 0.005\n", - "3 1.000000 0.004975 1.000000 0.005\n", - "4 1.000000 1.000000 0.004975 0.005" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "display(df.head())" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 750d913f9..97d9793a6 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,4 +1,4 @@ -from typing import Callable, Dict +from typing import Callable import numpy as np from numpy.typing import ArrayLike @@ -46,6 +46,7 @@ def __init__( self.n_samples_test_ = None self._n_samples_ = None + self._metric = None self._covariate_index_cache_ = None self._type_of_target_ = None self.n_features_in_ = None @@ -66,6 +67,7 @@ def reset(self): self._n_samples_ = None self._covariate_index_cache_ = None self._type_of_target_ = None + self._metric = None self.n_features_in_ = None def _get_estimators_indices(self): @@ -238,6 +240,13 @@ def statistic( # Fit each tree and compute posteriors with train test splits self.n_samples_test_ = test_size_ + if self._metric is not None and self._metric != metric: + raise RuntimeError( + f"Metric must be {self._metric}, got {metric}. " + f"If running on a new dataset, call the 'reset' method." + ) + self._metric = metric + if not is_classifier(self.estimator_) and metric not in REGRESSOR_METRICS: raise RuntimeError( f'Metric must be either "mse" or "mae" if using Regression, got {metric}' @@ -334,7 +343,6 @@ def test( # Note: at this point, both `estimator` and `permuted_estimator_` should # have been fitted already, so we can now compute on the null by resampling # the posteriors and computing the test statistic on the resampled posteriors - print(observe_posteriors.shape, permute_posteriors.shape) if self.sample_dataset_per_tree: metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y[observe_samples, :], @@ -502,14 +510,14 @@ def _statistic( **metric_kwargs, ): """Helper function to compute the test statistic.""" - metric_func: Callable[[ArrayLike, ArrayLike, Dict], float] = METRIC_FUNCTIONS[metric] + metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) + posterior_arr = np.full( + (self.n_estimators, self.n_samples_test_, estimator.n_outputs_), np.nan + ) if self.permute_per_tree: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - posterior_arr = np.zeros( - (self.n_estimators, self.n_samples_test_, estimator.n_outputs_) - ) for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): tree: DecisionTreeRegressor = estimator.estimators_[idx] train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) @@ -551,9 +559,6 @@ def _statistic( estimator.fit(X_train, y_train) # construct posterior array for all trees (n_trees, n_samples_test, n_outputs) - posterior_arr = np.full( - (len(estimator.estimators_), self.n_samples_test_, estimator.n_outputs_), np.nan - ) for itree, tree in enumerate(estimator.estimators_): posterior_arr[itree, ...] = tree.predict(X_test) @@ -669,7 +674,7 @@ def __init__( verbose=0, test_size=0.2, permute_per_tree=True, - sample_dataset_per_tree=False, + sample_dataset_per_tree=True, ): super().__init__( estimator=estimator, @@ -701,7 +706,7 @@ def _statistic( **metric_kwargs, ): """Helper function to compute the test statistic.""" - metric_func: Callable[[ArrayLike, ArrayLike, Dict], float] = METRIC_FUNCTIONS[metric] + metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) if metric in POSTERIOR_FUNCTIONS: @@ -731,7 +736,6 @@ def _statistic( y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) # Fill test set posteriors & set rest NaN - # TODO: refactor so posterior_arr is just a large NaN array posterior_arr[idx, indices_test, :] = y_pred # posterior else: # fitting a forest will only get one unique train/test split @@ -782,11 +786,13 @@ def _statistic( # determine if there are any nans in the final posterior array # Average all posteriors (n_samples_test, n_outputs) - posterior_forest = np.nanmean(posterior_arr, axis=0) + # posterior_forest = np.nanmean(posterior_arr, axis=0) # # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] + # nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] + nonnan_indices = np.all(~np.isnan(posterior_arr), axis=(0, 2)) samples = nonnan_indices + print(nonnan_indices) # Ignore all NaN values (samples not tested) y_true_final = y[(nonnan_indices), :] diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index ba4050591..05aaae129 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -29,26 +29,34 @@ iris_y = iris_y[p] -def test_featureimportance_forest_permute_pertree(): +@pytest.mark.parametrize("sample_dataset_per_tree", [True, False]) +def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree): est = FeatureImportanceForestClassifier( estimator=RandomForestClassifier( n_estimators=10, + random_state=seed, ), permute_per_tree=True, - sample_dataset_per_tree=False, + test_size=0.7, + random_state=seed, + sample_dataset_per_tree=sample_dataset_per_tree, ) - est.statistic(iris_X[:10], iris_y[:10]) + n_samples = 50 + est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mse") assert ( - len(est.train_test_samples_[0][1]) == 10 * est.test_size - ), f"{len(est.train_test_samples_[0][1])} {10 * est.test_size}" - assert len(est.train_test_samples_[0][0]) == est._n_samples_ - 10 * est.test_size + len(est.train_test_samples_[0][1]) == n_samples * est.test_size + ), f"{len(est.train_test_samples_[0][1])} {n_samples * est.test_size}" + assert len(est.train_test_samples_[0][0]) == est._n_samples_ - n_samples * est.test_size - est.test(iris_X[:10], iris_y[:10], [0, 1], n_repeats=10, metric="mse") + est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mse") assert ( - len(est.train_test_samples_[0][1]) == 10 * est.test_size - ), f"{len(est.train_test_samples_[0][1])} {10 * est.test_size}" - assert len(est.train_test_samples_[0][0]) == est._n_samples_ - 10 * est.test_size + len(est.train_test_samples_[0][1]) == n_samples * est.test_size + ), f"{len(est.train_test_samples_[0][1])} {n_samples * est.test_size}" + assert len(est.train_test_samples_[0][0]) == est._n_samples_ - n_samples * est.test_size + + with pytest.raises(RuntimeError, match="Metric must be"): + est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi") def test_featureimportance_forest_errors(): diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 766fd30d9..30ce83ff8 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -171,18 +171,14 @@ def _compute_null_distribution_coleman( ---------- y_test : ArrayLike of shape (n_samples, n_outputs) The output matrix. - y_pred_proba_normal : ArrayLike of shape (n_estimators, n_samples_normal, n_outputs) + y_pred_proba_normal : ArrayLike of shape (n_estimators_normal, n_samples, n_outputs) The predicted posteriors from the normal forest. Some of the trees may have nans predicted in them, which means the tree used these samples for training and not for prediction. - y_pred_proba_perm : ArrayLike of shape (n_estimators, n_samples_perm, n_outputs) + y_pred_proba_perm : ArrayLike of shape (n_estimators_perm, n_samples, n_outputs) The predicted posteriors from the permuted forest. Some of the trees may have nans predicted in them, which means the tree used these samples for training and not for prediction. - normal_samples : ArrayLike of shape (n_samples_normal,) - The indices of the normal samples that we have a posterior for. - perm_samples : ArrayLike of shape (n_samples_perm,) - The indices of the permuted samples that we have a posterior for. metric : str, optional The metric, which to compute the null distribution of statistics, by default 'mse'. n_repeats : int, optional @@ -204,28 +200,15 @@ def _compute_null_distribution_coleman( # (n_estimators * 2, n_samples, n_outputs) all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0) - print(y_pred_proba_normal.shape) n_estimators, _, _ = y_pred_proba_normal.shape n_samples_test = len(y_test) if all_y_pred.shape[1] != n_samples_test: - print(all_y_pred.shape) - print(n_samples_test) - print(y_test.shape) - print("y_pred_proba_perm: ", y_pred_proba_perm.shape) - print("y_pred_proba: ", y_pred_proba_normal.shape) - raise RuntimeError( f"The number of samples in `all_y_pred` {len(all_y_pred)} " f"is not equal to 2 * n_samples_test {2 * n_samples_test}" ) - # create two stacked index arrays of y_test resulting in [1, ..., N, 1, ..., N] - # where N is `n_estimators` - # y_test_ind_arr = np.hstack( - # (np.arange(n_estimators, dtype=int), np.arange(n_estimators, dtype=int)) - # ) - - # create index array of [1, ..., 2N] to slice into `all_y_pred` + # create index array of [1, ..., 2N] to slice into `all_y_pred` the stacks of trees y_pred_ind_arr = np.arange((2 * n_estimators), dtype=int) metric_star = np.zeros((n_repeats,)) @@ -234,6 +217,7 @@ def _compute_null_distribution_coleman( # two sets of random indices from 1 : 2N are sampled using Fisher-Yates rng.shuffle(y_pred_ind_arr) + # get random half of the posteriors from two sets of trees first_forest_inds = y_pred_ind_arr[:n_samples_test] second_forest_inds = y_pred_ind_arr[:n_samples_test] From 2f06e76b5181314bf99cbf83f42138812576e395 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 28 Sep 2023 14:51:41 -0400 Subject: [PATCH 47/70] Cleanup Signed-off-by: Adam Li --- .../compare_coleman_and_permutation_forest.ipynb | 12 ++++++++++++ sktree/stats/forestht.py | 7 +------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 7c2febb11..482f63bc6 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -1223,6 +1223,18 @@ "(125, 600, 1) (125, 600, 1)\n", "X1: 0.004975124378109453\n", "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", "X500: 0.004975124378109453\n" ] } diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 97d9793a6..c4217201f 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -785,14 +785,9 @@ def _statistic( ) # determine if there are any nans in the final posterior array - # Average all posteriors (n_samples_test, n_outputs) - # posterior_forest = np.nanmean(posterior_arr, axis=0) - - # # Find the row indices with NaN values in any column - # nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] + # Find the row indices with NaN values in any column nonnan_indices = np.all(~np.isnan(posterior_arr), axis=(0, 2)) samples = nonnan_indices - print(nonnan_indices) # Ignore all NaN values (samples not tested) y_true_final = y[(nonnan_indices), :] From dc6dd29956d957f8a1a7304f1c8587b7ae96d7ba Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 28 Sep 2023 15:09:01 -0400 Subject: [PATCH 48/70] Fix docs biuld Signed-off-by: Adam Li --- ...mpare_coleman_and_permutation_forest.ipynb | 112 +++++++++++++++++- sktree/stats/forestht.py | 6 +- 2 files changed, 114 insertions(+), 4 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 482f63bc6..e44fefd69 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -1235,7 +1235,117 @@ "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", "(125, 600, 1) (125, 600, 1)\n", - "X500: 0.004975124378109453\n" + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n" ] } ], diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index c4217201f..83b041c9a 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -179,7 +179,7 @@ def statistic( stat : float The test statistic. posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final), optional + (n_estimators, n_samples_final), optional If ``return_posteriors`` is True, then the posterior probabilities of the samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` if all samples are encountered in the test set of at least one tree in the @@ -645,7 +645,7 @@ class FeatureImportanceForestClassifier(BaseForestHT): The true labels of the samples used in the final test. posterior_final_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final) + (n_estimators, n_samples_final) The predicted posterior probabilities of the samples used in the final test. null_dist_ : ArrayLike of shape (n_repeats,) @@ -844,7 +844,7 @@ def statistic( stat : float The test statistic. posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final), optional + (n_estimators, n_samples_final), optional If ``return_posteriors`` is True, then the posterior probabilities of the samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` if all samples are encountered in the test set of at least one tree in the From dbf079e0cfea0b6eb3cc735992e9d89b15e91ea7 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 28 Sep 2023 15:16:06 -0400 Subject: [PATCH 49/70] Fix unit-test Signed-off-by: Adam Li --- ...mpare_coleman_and_permutation_forest.ipynb | 36 ++++++++++++++++++- sktree/stats/forestht.py | 4 ++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index e44fefd69..24475912c 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -1345,7 +1345,41 @@ "(125, 600, 1) (125, 600, 1)\n", "X500: 1.0\n", "(125, 600, 1) (125, 600, 1)\n", - "X2: 0.004975124378109453\n" + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n" ] } ], diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 83b041c9a..f0f41dc9a 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -785,8 +785,10 @@ def _statistic( ) # determine if there are any nans in the final posterior array + temp_posterior_forest = np.nanmean(posterior_arr, axis=0) + nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] + # Find the row indices with NaN values in any column - nonnan_indices = np.all(~np.isnan(posterior_arr), axis=(0, 2)) samples = nonnan_indices # Ignore all NaN values (samples not tested) From 1c0f66bbe2326e05ee3948d90ea6930f53199354 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Fri, 29 Sep 2023 11:35:46 -0400 Subject: [PATCH 50/70] Add update reshapes Signed-off-by: Adam Li --- ...mpare_coleman_and_permutation_forest.ipynb | 3520 ++++++++++++++--- sktree/stats/forestht.py | 33 +- 2 files changed, 2977 insertions(+), 576 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 24475912c..8b5c34f46 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -38,12 +38,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], "source": [ - "def linear_model_ancova(sigma_factor=2.0, seed=None):\n", + "def linear_model_ancova(\n", + " sigma_factor=2.0, seed=None, permute_per_tree=True, sample_dataset_per_tree=True\n", + "):\n", " r\"\"\"Test MIGHT using MSE from linear model simulation.\n", "\n", " See https://arxiv.org/pdf/1904.07830.pdf Figure 1.\n", @@ -57,8 +59,6 @@ " test_size = 0.1\n", " n_repeats = 500\n", " metric = \"mse\"\n", - " permute_per_tree = True\n", - " sample_dataset_per_tree = True\n", "\n", " rng = np.random.default_rng(seed)\n", "\n", @@ -239,556 +239,2800 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X1: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X6: 0.001996007984031936\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 1.0\n", - "(125, 2200, 1) (125, 2200, 1)\n", - "(125, 2200, 1)\n", - "X2/7: 0.001996007984031936\n" + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n", + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X1: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X6: 0.001996007984031936\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", + " y_train = y_train.ravel()\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X2/7: 1.0\n" ] } ], @@ -802,7 +3046,12 @@ " for idx in range(5):\n", " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", "\n", - " elements_dict = linear_model_ancova(sigma_factor, new_seed)\n", + " elements_dict = linear_model_ancova(\n", + " sigma_factor,\n", + " new_seed,\n", + " permute_per_tree=True,\n", + " sample_dataset_per_tree=False,\n", + " )\n", " for key, value in elements_dict.items():\n", " pvalue_dict[key].append(value)\n", " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", @@ -845,13 +3094,13 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "id": "7c99ce8c-a32d-447b-9dd2-85c8d310239f", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -870,19 +3119,21 @@ " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", " ax.legend()\n", - "fig.suptitle(\"Linear ANCOVA model with Coleman Forest (Permutation per tree)\")\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Original Coleman method)\"\n", + ")\n", "fig.tight_layout()" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "67846a66-1817-46c8-9ccc-5281773c4f92", "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -901,7 +3152,9 @@ " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", " ax.legend()\n", - "fig.suptitle(\"Linear ANCOVA model with Coleman Forest\")\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with Coleman Forest (Permute per tree, but not sample separate dataset)\"\n", + ")\n", "fig.tight_layout()" ] }, @@ -968,7 +3221,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "a2aed8f0-1230-4128-ad77-d84764c28d0d", "metadata": { "scrolled": true @@ -1379,7 +3632,55 @@ "(125, 600, 1) (125, 600, 1)\n", "X1: 1.0\n", "(125, 600, 1) (125, 600, 1)\n", - "X500: 1.0\n" + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X2: 0.004975124378109453\n", + "(125, 600, 1) (125, 600, 1)\n", + "X1: 1.0\n", + "(125, 600, 1) (125, 600, 1)\n", + "X500: 0.004975124378109453\n" ] } ], @@ -1403,30 +3704,129 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "d3e21945-92b3-4ccc-8f29-b44f67d9cf33", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], "source": [ "print(\"done\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "b2bced31-0367-48a8-88e1-0afd6a60173f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
X2X1X500sigma_factor
00.0049750.0049751.0000000.01
10.0049750.0049750.0049750.01
20.0049750.0049750.0049750.01
30.0049750.0049751.0000000.01
41.0000001.0000000.0049750.01
\n", + "
" + ], + "text/plain": [ + " X2 X1 X500 sigma_factor\n", + "0 0.004975 0.004975 1.000000 0.01\n", + "1 0.004975 0.004975 0.004975 0.01\n", + "2 0.004975 0.004975 0.004975 0.01\n", + "3 0.004975 0.004975 1.000000 0.01\n", + "4 1.000000 1.000000 0.004975 0.01" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "display(df.head())" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "c4dbdaf1-9af7-4e6d-83b6-a9cabc18dc91", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "fig, axs = plt.subplots(3, 1, figsize=(8, 6), sharey=True, sharex=True)\n", "axs = axs.flatten()\n", diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index f0f41dc9a..d2c4ca4d4 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -513,9 +513,7 @@ def _statistic( metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric] rng = np.random.default_rng(self.random_state) - posterior_arr = np.full( - (self.n_estimators, self.n_samples_test_, estimator.n_outputs_), np.nan - ) + posterior_arr = np.full((self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan) if self.permute_per_tree: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): @@ -526,18 +524,6 @@ def _statistic( # Fill test set posteriors & set rest NaN posterior_arr[idx, indices_test, :] = y_pred # posterior - - # determine if there are any nans in the final posterior array - # Average all posteriors (n_samples_test, n_outputs) - posterior_forest = np.nanmean(posterior_arr, axis=0) - - # # Find the row indices with NaN values in any column - nonnan_indices = np.where(~np.isnan(posterior_forest).any(axis=1))[0] - samples = nonnan_indices - - # # Ignore all NaN values (samples not tested) - y_true_final = y[nonnan_indices, :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -556,16 +542,31 @@ def _statistic( ) X_train[:, covariate_index] = X_train[index_arr, covariate_index] + if self._type_of_target_ == "binary": + y_train = y_train.ravel() estimator.fit(X_train, y_train) # construct posterior array for all trees (n_trees, n_samples_test, n_outputs) for itree, tree in enumerate(estimator.estimators_): - posterior_arr[itree, ...] = tree.predict(X_test) + posterior_arr[itree, indices_test, ...] = tree.predict(X_test).reshape( + -1, tree.n_outputs_ + ) # set variables to compute metric samples = indices_test y_true_final = y_test + # determine if there are any nans in the final posterior array + temp_posterior_forest = np.nanmean(posterior_arr, axis=0) + nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] + + # Find the row indices with NaN values in any column + samples = nonnan_indices + + # Ignore all NaN values (samples not tested) + y_true_final = y[(nonnan_indices), :] + posterior_arr = posterior_arr[:, (nonnan_indices), :] + # Average all posteriors (n_samples_test, n_outputs) to compute the statistic posterior_forest = np.nanmean(posterior_arr, axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) From 39aef2ab816e85746291c6c478a96028701a3481 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 2 Oct 2023 13:49:45 -0400 Subject: [PATCH 51/70] Add some todos and fixes from quick call w/ sambit/hao Signed-off-by: Adam Li --- sktree/__init__.py | 10 ++++++++++ sktree/ensemble/_honest_forest.py | 13 ++++++++----- sktree/stats/forestht.py | 12 ++++++++++-- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/sktree/__init__.py b/sktree/__init__.py index c6af80ea0..87de95f28 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -43,6 +43,12 @@ ExtraTreesClassifier, ExtraTreesRegressor, ) + from ._lib.sklearn.tree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, + ) from .neighbors import NearestNeighborsMetaEstimator from .ensemble import ExtendedIsolationForest from .ensemble._unsupervised_forest import ( @@ -85,4 +91,8 @@ "ExtraTreesClassifier", "ExtraTreesRegressor", "ExtendedIsolationForest", + "DecisionTreeClassifier", + "DecisionTreeRegressor", + "ExtraTreeClassifier", + "ExtraTreeRegressor", ] diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py index 5ab5eca64..63249f179 100644 --- a/sktree/ensemble/_honest_forest.py +++ b/sktree/ensemble/_honest_forest.py @@ -15,6 +15,9 @@ DTYPE = _sklearn_tree.DTYPE +# TODO: throw error and check if sklearn.tree.BaseDecisionTree, +# or sktree.tree.BaseDecisionTree (i.e. fork of sklearn.tree.BaseDecisionTree) + class HonestForestClassifier(ForestClassifier): """ @@ -449,12 +452,12 @@ def _predict_proba(self, X, indices=None, impute_missing=None): posteriors = [ np.zeros((X.shape[0], j), dtype=np.float64) for j in np.atleast_1d(self.n_classes_) ] - lock = threading.Lock() - if indices is None: indices = [None] * self.n_estimators + + lock = threading.Lock() Parallel(n_jobs=n_jobs, verbose=self.verbose, require="sharedmem")( - delayed(_accumulate_prediction)(tree, X, posteriors, lock, idx) + delayed(_accumulate_prediction)(tree.predict_proba, X, posteriors, lock, idx) for tree, idx in zip(self.estimators_, indices) ) @@ -579,7 +582,7 @@ def get_leaf_node_samples(self, X): return self.estimator_.get_leaf_node_samples(X) -def _accumulate_prediction(tree, X, out, lock, indices=None): +def _accumulate_prediction(predict, X, out, lock, indices=None): """ See https://github.com/scikit-learn/scikit-learn/blob/ 95119c13af77c76e150b753485c662b7c52a41a2/sklearn/ensemble/_forest.py#L460 @@ -590,7 +593,7 @@ def _accumulate_prediction(tree, X, out, lock, indices=None): if indices is None: indices = np.arange(X.shape[0]) - proba = tree.predict_proba(X[indices], check_input=False) + proba = predict(X[indices], check_input=False) with lock: if len(out) == 1: diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index d2c4ca4d4..7b8ad4f48 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -55,6 +55,12 @@ def __init__( def n_estimators(self): return self.estimator_.n_estimators + @property + def n_classes_(self): + return self.estimator_.n_classes_ + + # tODO: replace all instances of estimator.n_classes_ with self.n_classes_ + def reset(self): class_attributes = dir(type(self)) instance_attributes = dir(self) @@ -204,7 +210,7 @@ def statistic( self.estimator_ = self._get_estimator() estimator = self.estimator_ else: - self.permuted_estimator_ = clone(self.estimator_) + self.permuted_estimator_ = self._get_estimator() estimator = self.permuted_estimator_ # Infer type of target y @@ -313,6 +319,8 @@ def test( """ X, y, covariate_index = self._check_input(X, y, covariate_index) + # TODO: need to add a more robust check to ensure that the estimator is correct when + # refitting if self._n_samples_ is None: # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( @@ -497,7 +505,7 @@ def _get_estimator(self): raise RuntimeError(f"Estimator must be a ForestRegressor, got {type(self.estimator)}") else: estimator_ = self.estimator - return estimator_ + return clone(estimator_) def _statistic( self, From 48d889b4536d6cdb7c18f6822decacae683a2c34 Mon Sep 17 00:00:00 2001 From: Sambit Panda Date: Mon, 2 Oct 2023 21:11:57 -0400 Subject: [PATCH 52/70] set covariate_index to None by default and change self.n_classes --- sktree/stats/forestht.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 7b8ad4f48..682313e2e 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -275,7 +275,7 @@ def test( self, X, y, - covariate_index: ArrayLike, + covariate_index: ArrayLike = None, metric: str = "mse", n_repeats: int = 1000, return_posteriors: bool = False, @@ -725,9 +725,7 @@ def _statistic( if predict_posteriors: # now initialize posterior array as (n_trees, n_samples_test, n_classes) - posterior_arr = np.full( - (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan - ) + posterior_arr = np.full((self.n_estimators, self._n_samples_, self.n_classes_), np.nan) else: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) posterior_arr = np.full( From bbb5c7c0f9d470b0114c0eb3c25673cfd72de1b8 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 2 Oct 2023 21:14:15 -0400 Subject: [PATCH 53/70] Fix a few issues and consolidate todos Signed-off-by: Adam Li --- doc/api.rst | 9 ++ sktree/__init__.py | 10 --- sktree/ensemble/_honest_forest.py | 7 +- sktree/stats/forestht.py | 14 ++- sktree/tree/__init__.py | 9 +- sktree/tree/_classes.py | 120 ++++++++++++++++++++++++++ sktree/tree/_honest_tree.py | 28 +++--- sktree/tree/tests/test_honest_tree.py | 11 +++ 8 files changed, 167 insertions(+), 41 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index ecaa78ecb..2a74a7a01 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -34,6 +34,15 @@ to the scikit-learn estimators. ExtraTreesClassifier ExtraTreesRegressor +.. currentmodule:: sktree.tree +.. autosummary:: + :toctree: generated/ + + DecisionTreeClassifier + DecisionTreeRegressor + ExtraTreeClassifier + ExtraTreeRegressor + Supervised ---------- Decision-tree models are traditionally implemented with axis-aligned splits and diff --git a/sktree/__init__.py b/sktree/__init__.py index 87de95f28..c6af80ea0 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -43,12 +43,6 @@ ExtraTreesClassifier, ExtraTreesRegressor, ) - from ._lib.sklearn.tree import ( - DecisionTreeClassifier, - DecisionTreeRegressor, - ExtraTreeClassifier, - ExtraTreeRegressor, - ) from .neighbors import NearestNeighborsMetaEstimator from .ensemble import ExtendedIsolationForest from .ensemble._unsupervised_forest import ( @@ -91,8 +85,4 @@ "ExtraTreesClassifier", "ExtraTreesRegressor", "ExtendedIsolationForest", - "DecisionTreeClassifier", - "DecisionTreeRegressor", - "ExtraTreeClassifier", - "ExtraTreeRegressor", ] diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py index 63249f179..e40773594 100644 --- a/sktree/ensemble/_honest_forest.py +++ b/sktree/ensemble/_honest_forest.py @@ -15,9 +15,6 @@ DTYPE = _sklearn_tree.DTYPE -# TODO: throw error and check if sklearn.tree.BaseDecisionTree, -# or sktree.tree.BaseDecisionTree (i.e. fork of sklearn.tree.BaseDecisionTree) - class HonestForestClassifier(ForestClassifier): """ @@ -190,7 +187,9 @@ class HonestForestClassifier(ForestClassifier): tree_estimator : object, default=None Type of decision tree classifier to use. By default `None`, which - defaults to :class:`sklearn.tree.DecisionTreeClassifier`. + defaults to `sktree.tree.DecisionTreeClassifier`. Note + that one MUST use trees imported from the `sktree.tree` + API namespace rather than from `sklearn.tree`. Attributes ---------- diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 7b8ad4f48..8874ccaf5 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -50,17 +50,12 @@ def __init__( self._covariate_index_cache_ = None self._type_of_target_ = None self.n_features_in_ = None + self._is_fitted = False @property def n_estimators(self): return self.estimator_.n_estimators - @property - def n_classes_(self): - return self.estimator_.n_classes_ - - # tODO: replace all instances of estimator.n_classes_ with self.n_classes_ - def reset(self): class_attributes = dir(type(self)) instance_attributes = dir(self) @@ -75,6 +70,7 @@ def reset(self): self._type_of_target_ = None self._metric = None self.n_features_in_ = None + self._is_fitted = False def _get_estimators_indices(self): indices = np.arange(self._n_samples_, dtype=int) @@ -319,9 +315,7 @@ def test( """ X, y, covariate_index = self._check_input(X, y, covariate_index) - # TODO: need to add a more robust check to ensure that the estimator is correct when - # refitting - if self._n_samples_ is None: + if self._is_fitted: # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( X, @@ -585,6 +579,7 @@ def _statistic( self.y_true_final_ = y_true_final self.posterior_final_ = posterior_arr self.stat_ = stat + self._is_fitted = True if return_posteriors: return stat, posterior_arr, samples @@ -815,6 +810,7 @@ def _statistic( self.y_true_final_ = y_true_final self.posterior_final_ = posterior_arr self.stat_ = stat + self._is_fitted = True if return_posteriors: return stat, posterior_arr, samples diff --git a/sktree/tree/__init__.py b/sktree/tree/__init__.py index be8baf5db..1ed18f3f0 100644 --- a/sktree/tree/__init__.py +++ b/sktree/tree/__init__.py @@ -1,4 +1,9 @@ -from .._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from .._lib.sklearn.tree import ( + DecisionTreeClassifier, + DecisionTreeRegressor, + ExtraTreeClassifier, + ExtraTreeRegressor, +) from ._classes import ( ExtraObliqueDecisionTreeClassifier, ExtraObliqueDecisionTreeRegressor, @@ -25,4 +30,6 @@ "HonestTreeClassifier", "DecisionTreeClassifier", "DecisionTreeRegressor", + "ExtraTreeClassifier", + "ExtraTreeRegressor", ] diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py index 1fd839ab7..24c25afcf 100644 --- a/sktree/tree/_classes.py +++ b/sktree/tree/_classes.py @@ -704,6 +704,20 @@ class ObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier): ``(max_features, n_features)``. Thus this value must always be less than ``n_features`` in order to be valid. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- classes_ : ndarray of shape (n_classes,) or list of ndarray @@ -825,6 +839,9 @@ def __init__( min_impurity_decrease=0.0, class_weight=None, feature_combinations=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -838,6 +855,9 @@ def __init__( class_weight=class_weight, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.feature_combinations = feature_combinations @@ -1080,6 +1100,20 @@ class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor): ``(max_features, n_features)``. Thus this value must always be less than ``n_features`` in order to be valid. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- feature_importances_ : ndarray of shape (n_features,) @@ -1186,6 +1220,9 @@ def __init__( max_leaf_nodes=None, min_impurity_decrease=0.0, feature_combinations=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -1198,6 +1235,9 @@ def __init__( max_leaf_nodes=max_leaf_nodes, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.feature_combinations = feature_combinations @@ -1472,6 +1512,20 @@ class PatchObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier) as follows: for every patch that is sampled, the feature weights over the entire patch is summed and normalizes the patch. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- classes_ : ndarray of shape (n_classes,) or list of ndarray @@ -1574,6 +1628,9 @@ def __init__( data_dims=None, boundary=None, feature_weight=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -1587,6 +1644,9 @@ def __init__( class_weight=class_weight, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.min_patch_dims = min_patch_dims @@ -1913,6 +1973,20 @@ class PatchObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor): as follows: for every patch that is sampled, the feature weights over the entire patch is summed and normalizes the patch. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- feature_importances_ : ndarray of shape (n_features,) @@ -2018,6 +2092,9 @@ def __init__( data_dims=None, boundary=None, feature_weight=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -2030,6 +2107,9 @@ def __init__( max_leaf_nodes=max_leaf_nodes, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.min_patch_dims = min_patch_dims @@ -2363,6 +2443,20 @@ class ExtraObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier) ``(max_features, n_features)``. Thus this value must always be less than ``n_features`` in order to be valid. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- classes_ : ndarray of shape (n_classes,) or list of ndarray @@ -2488,6 +2582,9 @@ def __init__( min_impurity_decrease=0.0, class_weight=None, feature_combinations=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -2501,6 +2598,9 @@ def __init__( class_weight=class_weight, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.feature_combinations = feature_combinations @@ -2752,6 +2852,20 @@ class ExtraObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor): ``(max_features, n_features)``. Thus this value must always be less than ``n_features`` in order to be valid. + ccp_alpha : non-negative float, default=0.0 + Not used. + + store_leaf_values : bool, default=False + Whether to store the leaf values. + + monotonic_cst : array-like of int of shape (n_features), default=None + Indicates the monotonicity constraint to enforce on each feature. + - 1: monotonic increase + - 0: no constraint + - -1: monotonic decrease + + Not used. + Attributes ---------- feature_importances_ : ndarray of shape (n_features,) @@ -2859,6 +2973,9 @@ def __init__( max_leaf_nodes=None, min_impurity_decrease=0.0, feature_combinations=None, + ccp_alpha=None, + store_leaf_values=False, + monotonic_cst=None, ): super().__init__( criterion=criterion, @@ -2871,6 +2988,9 @@ def __init__( max_leaf_nodes=max_leaf_nodes, random_state=random_state, min_impurity_decrease=min_impurity_decrease, + ccp_alpha=ccp_alpha, + store_leaf_values=store_leaf_values, + monotonic_cst=monotonic_cst, ) self.feature_combinations = feature_combinations diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py index 8e0f2321d..7d534ddb9 100644 --- a/sktree/tree/_honest_tree.py +++ b/sktree/tree/_honest_tree.py @@ -1,11 +1,10 @@ # Authors: Ronan Perry, Sambit Panda, Haoyin Xu # Adopted from: https://github.com/neurodata/honest-forests -import inspect - import numpy as np from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context, clone from sklearn.ensemble._base import _set_random_states +from sklearn.tree._classes import BaseDecisionTree as skBaseDecisionTree from sklearn.utils.multiclass import _check_partial_fit_first_call, check_classification_targets from sklearn.utils.validation import check_is_fitted, check_X_y @@ -152,9 +151,10 @@ class HonestTreeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseDecisionTree Read more in the :ref:`User Guide `. tree_estimator : object, default=None - Instatiated tree of type BaseDecisionTree. + Instatiated tree of type BaseDecisionTree from sktree. If None, then DecisionTreeClassifier with default parameters will - be used. + be used. Note that one MUST use trees imported from the `sktree.tree` + API namespace rather than from `sklearn.tree`. honest_fraction : float, default=0.5 Fraction of training samples used for estimates in the leaves. The @@ -555,6 +555,10 @@ def _fit( store_leaf_values=self.store_leaf_values, ) else: + # we throw an error if the user is using trees from sklearn:main + if isinstance(self.tree_estimator, skBaseDecisionTree): + raise RuntimeError("Instead of using sklearn.tree, use trees import from sktree.") + # XXX: maybe error out if the tree_estimator is already fitted self.estimator_ = clone(self.tree_estimator) self.estimator_.set_params( @@ -570,22 +574,12 @@ def _fit( class_weight=self.class_weight, random_state=self.random_state, min_impurity_decrease=self.min_impurity_decrease, + ccp_alpha=self.ccp_alpha, + monotonic_cst=self.monotonic_cst, + store_leaf_values=self.store_leaf_values, ) ) - # TODO: refactor oblique trees to have these parameters by default, but not used - init_signature = inspect.signature(self.estimator_.__init__) - if "ccp_alpha" in init_signature.parameters: - self.estimator_.set_params(**dict(ccp_alpha=self.ccp_alpha)) - if "store_leaf_values" in init_signature.parameters: - self.estimator_.set_params( - **dict( - store_leaf_values=self.store_leaf_values, - ) - ) - if "monotonic_cst" in init_signature.parameters: - self.estimator_.set_params(**dict(monotonic_cst=self.monotonic_cst)) - if self.random_state is not None: _set_random_states(self.estimator_, self.random_state) diff --git a/sktree/tree/tests/test_honest_tree.py b/sktree/tree/tests/test_honest_tree.py index cc130a14a..92dc99845 100644 --- a/sktree/tree/tests/test_honest_tree.py +++ b/sktree/tree/tests/test_honest_tree.py @@ -2,6 +2,7 @@ import pytest from sklearn import datasets from sklearn.metrics import accuracy_score +from sklearn.tree import DecisionTreeClassifier as skDecisionTreeClassifier from sklearn.utils.estimator_checks import parametrize_with_checks from sktree._lib.sklearn.tree import DecisionTreeClassifier @@ -117,3 +118,13 @@ def test_sklearn_compatible_estimator(estimator, check): if check.func.__name__ in ["check_class_weight_classifiers", "check_classifier_multioutput"]: pytest.skip() check(estimator) + + +def test_error_with_sklearn_trees(): + X = np.ones((20, 4)) + X[10:] *= -1 + y = [0] * 10 + [1] * 10 + + with pytest.raises(RuntimeError, match="Instead of using sklearn.tree"): + clf = HonestTreeClassifier(tree_estimator=skDecisionTreeClassifier()) + clf.fit(X, y) From 5ad1b4482f2f650dc6af4f860a8c7a174c39d554 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 2 Oct 2023 21:32:01 -0400 Subject: [PATCH 54/70] Fix Signed-off-by: Adam Li --- sktree/stats/forestht.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 499db6e25..cd19e2571 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -315,7 +315,7 @@ def test( """ X, y, covariate_index = self._check_input(X, y, covariate_index) - if self._is_fitted: + if not self._is_fitted: # first compute the test statistic on the un-permuted data observe_stat, observe_posteriors, observe_samples = self.statistic( X, @@ -720,7 +720,9 @@ def _statistic( if predict_posteriors: # now initialize posterior array as (n_trees, n_samples_test, n_classes) - posterior_arr = np.full((self.n_estimators, self._n_samples_, self.n_classes_), np.nan) + posterior_arr = np.full( + (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan + ) else: # now initialize posterior array as (n_trees, n_samples_test, n_outputs) posterior_arr = np.full( From 80ada6891bab655af0829d66ecef08fe7ab5ee95 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Mon, 2 Oct 2023 21:40:04 -0400 Subject: [PATCH 55/70] Add clone to get estimators Signed-off-by: Adam Li --- sktree/stats/forestht.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index cd19e2571..f198cec19 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -697,7 +697,7 @@ def _get_estimator(self): else: # self.estimator is an instance of a ForestEstimator estimator_ = self.estimator - return estimator_ + return clone(estimator_) def _statistic( self, From ff377403686b38f9d1e065d06e37b5ffbf916338 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 3 Oct 2023 10:04:19 -0400 Subject: [PATCH 56/70] ENH mark all default tests as MI and correct posterior return parameter --- sktree/stats/forestht.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index f198cec19..f41b67157 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -152,7 +152,7 @@ def statistic( X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = None, - metric="mse", + metric="mi", return_posteriors: bool = False, check_input: bool = True, **metric_kwargs, @@ -272,9 +272,9 @@ def test( X, y, covariate_index: ArrayLike = None, - metric: str = "mse", + metric: str = "mi", n_repeats: int = 1000, - return_posteriors: bool = False, + return_posteriors: bool = True, **metric_kwargs, ): """Perform hypothesis test using Coleman method. @@ -302,7 +302,7 @@ def test( n_repeats : int, optional Number of times to sample the null distribution, by default 1000. return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. + Whether or not to return the posteriors, by default True. **metric_kwargs : dict, optional Additional keyword arguments to pass to the metric function. @@ -322,7 +322,7 @@ def test( y, covariate_index=None, metric=metric, - return_posteriors=True, + return_posteriors=return_posteriors, check_input=False, **metric_kwargs, ) @@ -337,7 +337,7 @@ def test( y, covariate_index=covariate_index, metric=metric, - return_posteriors=True, + return_posteriors=return_posteriors, check_input=False, **metric_kwargs, ) From aed91798acaefac0773a4dc12d430977d79bc023 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 3 Oct 2023 10:29:24 -0400 Subject: [PATCH 57/70] FIX unify all variable names so posteriors are not saved twice Add permute_stat to class variable --- sktree/stats/forestht.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index f41b67157..74a4350d3 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -327,9 +327,9 @@ def test( **metric_kwargs, ) else: - observe_samples = self.samples_ - observe_posteriors = self.posterior_final_ - observe_stat = self.stat_ + observe_samples = self.observe_samples_ + observe_posteriors = self.observe_posteriors_ + observe_stat = self.observe_stat_ # next permute the data permute_stat, permute_posteriors, permute_samples = self.statistic( @@ -341,6 +341,7 @@ def test( check_input=False, **metric_kwargs, ) + self.permute_stat_ = permute_stat # Note: at this point, both `estimator` and `permuted_estimator_` should # have been fitted already, so we can now compute on the null by resampling @@ -452,7 +453,8 @@ class FeatureImportanceForestRegressor(BaseForestHT): y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. - posterior_final_ : ArrayLike of shape (n_estimators, n_samples_final) + observe_posteriors_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final, n_classes) The predicted posterior probabilities of the samples used in the final test. null_dist_ : ArrayLike of shape (n_repeats,) @@ -575,10 +577,10 @@ def _statistic( if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior - self.samples_ = samples + self.observe_samples_ = samples self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_arr - self.stat_ = stat + self.observe_posteriors_ = posterior_arr + self.observe_stat_ = stat self._is_fitted = True if return_posteriors: @@ -648,8 +650,8 @@ class FeatureImportanceForestClassifier(BaseForestHT): y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. - posterior_final_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final) + observe_posteriors_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final, n_classes) The predicted posterior probabilities of the samples used in the final test. null_dist_ : ArrayLike of shape (n_repeats,) @@ -806,10 +808,10 @@ def _statistic( if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) # arrays of y and predicted posterior - self.samples_ = samples + self.observe_samples_ = samples self.y_true_final_ = y_true_final - self.posterior_final_ = posterior_arr - self.stat_ = stat + self.observe_posteriors_ = posterior_arr + self.observe_stat_ = stat self._is_fitted = True if return_posteriors: From c7164403f5c715241dad8461538babaad25e1748 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 3 Oct 2023 13:22:21 -0400 Subject: [PATCH 58/70] Add additional testing Signed-off-by: Adam Li --- .github/workflows/main.yml | 114 +++++++++++++++++++++++++++- .spin/cmds.py | 45 +++++------ doc/conf.py | 1 + pyproject.toml | 6 +- sktree/stats/forestht.py | 32 +++++++- sktree/stats/tests/test_forestht.py | 35 +++++++++ sktree/tree/_classes.py | 12 +-- 7 files changed, 206 insertions(+), 39 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a329c4362..12182c95f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -116,7 +116,116 @@ jobs: ./spin --help ./spin coverage --help ./spin test --help - ./spin coverage + ./spin test + cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml + + - name: debug + run: | + ls $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/ + echo "Okay..." + ls $PWD/build + ls ./ + + - name: Save build + uses: actions/upload-artifact@v3 + with: + name: sktree-build + path: $PWD/build + + build_and_test_slow: + name: Meson build ${{ matrix.os }} - py${{ matrix.python-version }} + timeout-minutes: 20 + needs: [build_and_test] + strategy: + fail-fast: false + matrix: + os: [ubuntu-22.04] + python-version: ["3.11"] + poetry-version: [1.5.0] + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + env: + # to make sure coverage/test command builds cleanly + FORCE_SUBMODULE: True + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v4.6.1 + with: + python-version: ${{ matrix.python-version }} + architecture: "x64" + cache: "pip" + cache-dependency-path: "requirements.txt" + + - name: show-gcc + run: | + gcc --version + + - name: Install Ccache for MacOSX + if: ${{ matrix.os == 'macos-latest'}} + run: | + brew install ccache + + - name: Install packages for Ubuntu + if: ${{ matrix.os == 'ubuntu-22.04'}} + run: | + sudo apt-get update + sudo apt-get install -y libopenblas-dev libatlas-base-dev liblapack-dev gfortran libgmp-dev libmpfr-dev libsuitesparse-dev ccache libmpc-dev + + - name: Install Python packages + run: | + python -m pip install -r build_requirements.txt + python -m pip install spin + python -m pip install -r test_requirements.txt + + - name: Prepare compiler cache + id: prep-ccache + shell: bash + run: | + mkdir -p "${CCACHE_DIR}" + echo "dir=$CCACHE_DIR" >> $GITHUB_OUTPUT + NOW=$(date -u +"%F-%T") + echo "timestamp=${NOW}" >> $GITHUB_OUTPUT + + - name: Setup compiler cache + uses: actions/cache@v3 + id: cache-ccachev1 + # Reference: https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows#matching-a-cache-key + # NOTE: The caching strategy is modeled in a way that it will always have a unique cache key for each workflow run + # (even if the same workflow is run multiple times). The restore keys are not unique and for a partial match, they will + # return the most recently created cache entry, according to the GitHub Action Docs. + with: + path: ${{ steps.prep-ccache.outputs.dir }} + # Restores ccache from either a previous build on this branch or on main + key: ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux-${{ steps.prep-ccache.outputs.timestamp }} + # This evaluates to `Linux Tests-3.9-ccache-linux-` which is not unique. As the CI matrix is expanded, this will + # need to be updated to be unique so that the cache is not restored from a different job altogether. + restore-keys: | + ${{ github.workflow }}-${{ matrix.python-version }}-ccache-linux- + + - name: Setup build and install scikit-tree + run: | + ./spin build -j 2 --forcesubmodule + + - name: Ccache performance + shell: bash -l {0} + run: ccache -s + + - name: build-path + run: | + echo "$PWD/build-install/" + export INSTALLED_PATH=$PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages + + - name: Run unit tests and coverage + run: | + ./spin --help + ./spin coverage --help + ./spin test --help + ./spin coverage -k "slowtest" cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml - name: debug @@ -127,7 +236,6 @@ jobs: ls ./ - name: Upload coverage stats to codecov - if: ${{ matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.10'}} uses: codecov/codecov-action@v3 with: # python spin goes into the INSTALLED path in order to run pytest @@ -146,7 +254,7 @@ jobs: release: name: Release runs-on: ubuntu-latest - needs: [build_and_test] + needs: [build_and_test_slow] if: startsWith(github.ref, 'refs/tags/') steps: - name: Checkout repository diff --git a/.spin/cmds.py b/.spin/cmds.py index 5b8585dde..b7f2cd863 100644 --- a/.spin/cmds.py +++ b/.spin/cmds.py @@ -1,5 +1,4 @@ import os -import shutil import subprocess import sys @@ -13,33 +12,27 @@ def get_git_revision_hash(submodule) -> str: @click.command() -@click.option("--build-dir", default="build", help="Build directory; default is `$PWD/build`") -@click.option("--clean", is_flag=True, help="Clean previously built docs before building") -@click.option("--noplot", is_flag=True, help="Build docs without plots") +@click.argument("slowtest", default=True) @click.pass_context -def docs(ctx, build_dir, clean=False, noplot=False): - """📖 Build documentation""" - if clean: - doc_dir = "./docs/_build" - if os.path.isdir(doc_dir): - print(f"Removing `{doc_dir}`") - shutil.rmtree(doc_dir) - - site_path = meson._get_site_packages() - if site_path is None: - print("No built scikit-tree found; run `./spin build` first.") - sys.exit(1) - - util.run(["pip", "install", "-q", "-r", "doc_requirements.txt"]) - - ctx.invoke(meson.docs) - - -@click.command() -@click.pass_context -def coverage(ctx): +def coverage(ctx, slowtest): """📊 Generate coverage report""" - pytest_args = ("-o", "python_functions=test_*", "sktree", "--cov=sktree", "--cov-report=xml") + if slowtest: + pytest_args = ( + "-o", + "python_functions=test_*", + "sktree", + "--cov=sktree", + "--cov-report=xml", + "-k slowtest", + ) + else: + pytest_args = ( + "-o", + "python_functions=test_*", + "sktree", + "--cov=sktree", + "--cov-report=xml", + ) ctx.invoke(meson.test, pytest_args=pytest_args) diff --git a/doc/conf.py b/doc/conf.py index fa949235e..33b43e88b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -240,6 +240,7 @@ "predict", "fit", "apply", + "TreeBuilder", } # validation diff --git a/pyproject.toml b/pyproject.toml index d032d2c5d..544710691 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -235,7 +235,7 @@ exclude = [ [tool.pytest.ini_options] minversion = '6.0' -addopts = '--durations 20 --junit-xml=junit-results.xml --verbose --ignore=sktree/_lib/' +addopts = '--durations 20 --junit-xml=junit-results.xml --verbose --ignore=sktree/_lib/ -k "not slowtest"' filterwarnings = [] [tool.coverage.run] @@ -267,7 +267,9 @@ Environments = [ 'spin.cmds.meson.ipython', 'spin.cmds.meson.python', ] -Documentation = ['.spin/cmds.py:docs'] +Documentation = [ + 'spin.cmds.meson.docs' + ] Metrics = [ '.spin/cmds.py:coverage', '.spin/cmds.py:asv', diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 74a4350d3..5ce124174 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,4 +1,4 @@ -from typing import Callable +from typing import Callable, Tuple import numpy as np from numpy.typing import ArrayLike @@ -28,6 +28,13 @@ class BaseForestHT(MetaEstimatorMixin): + observe_samples_: ArrayLike + observe_posteriors_: ArrayLike + observe_stat_: float + permute_samples_: ArrayLike + permute_posteriors_: ArrayLike + permute_stat_: float + def __init__( self, estimator=None, @@ -129,6 +136,13 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if y.ndim != 2: y = y.reshape(-1, 1) + if covariate_index is not None: + if not isinstance(covariate_index, (list, tuple, np.ndarray)): + raise RuntimeError("covariate_index must be an iterable of integer indices") + else: + if not all(isinstance(idx, int) for idx in covariate_index): + raise RuntimeError("Not all covariate_index are integer indices") + if self._n_samples_ is not None and X.shape[0] != self._n_samples_: raise RuntimeError( f"X must have {self._n_samples_} samples, got {X.shape[0]}. " @@ -156,7 +170,7 @@ def statistic( return_posteriors: bool = False, check_input: bool = True, **metric_kwargs, - ): + ) -> Tuple[float, ArrayLike, ArrayLike]: """Compute the test statistic. Parameters @@ -503,6 +517,20 @@ def _get_estimator(self): estimator_ = self.estimator return clone(estimator_) + def statistic( + self, + X: ArrayLike, + y: ArrayLike, + covariate_index: ArrayLike = None, + metric="mse", + return_posteriors: bool = False, + check_input: bool = True, + **metric_kwargs, + ): + return super().statistic( + X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs + ) + def _statistic( self, estimator: ForestClassifier, diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 05aaae129..925c62f1f 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,6 +1,7 @@ import numpy as np import pytest from flaky import flaky +from joblib import Parallel, delayed from scipy.special import expit from sklearn import datasets @@ -58,6 +59,14 @@ def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree): with pytest.raises(RuntimeError, match="Metric must be"): est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi") + # covariate index must be an iterable + with pytest.raises(RuntimeError, match="covariate_index must be an iterable"): + est.statistic(iris_X[:n_samples], iris_y[:n_samples], 0, metric="mi") + + # covariate index must be an iterable of ints + with pytest.raises(RuntimeError, match="Not all covariate_index"): + est.statistic(iris_X[:n_samples], iris_y[:n_samples], [0, 1.0], metric="mi") + def test_featureimportance_forest_errors(): permute_per_tree = False @@ -257,6 +266,7 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @flaky(max_runs=2) +@pytest.mark.slowtest @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( @@ -377,3 +387,28 @@ def test_forestht_check_inputs(forest_hyppo): y_invalid = np.random.rand(X.shape[0]) with pytest.raises(RuntimeError, match="y must have type"): forest_hyppo.statistic(X, y_invalid) + + +def test_parallelization(): + """Test parallelization of training forests.""" + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + + def run_forest(covariate_index=None): + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, + random_state=seed, + n_jobs=1, + ), + ) + obs_stat = clf.statistic(X, y, metric="mi") + perm_stat = clf.statistic(X, y, covariate_index=[covariate_index], metric="mi") + return obs_stat, perm_stat + + out = Parallel(n_jobs=1)( + delayed(run_forest)(covariate_index) for covariate_index in range(n_features) + ) + assert len(out) == n_features diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py index 24c25afcf..a9e00c6d0 100644 --- a/sktree/tree/_classes.py +++ b/sktree/tree/_classes.py @@ -839,7 +839,7 @@ def __init__( min_impurity_decrease=0.0, class_weight=None, feature_combinations=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): @@ -1220,7 +1220,7 @@ def __init__( max_leaf_nodes=None, min_impurity_decrease=0.0, feature_combinations=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): @@ -1628,7 +1628,7 @@ def __init__( data_dims=None, boundary=None, feature_weight=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): @@ -2092,7 +2092,7 @@ def __init__( data_dims=None, boundary=None, feature_weight=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): @@ -2582,7 +2582,7 @@ def __init__( min_impurity_decrease=0.0, class_weight=None, feature_combinations=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): @@ -2973,7 +2973,7 @@ def __init__( max_leaf_nodes=None, min_impurity_decrease=0.0, feature_combinations=None, - ccp_alpha=None, + ccp_alpha=0.0, store_leaf_values=False, monotonic_cst=None, ): From f6cb04b380eedc219df81dd6d02f5e5a58231a85 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 3 Oct 2023 13:44:22 -0400 Subject: [PATCH 59/70] Fix CI Signed-off-by: Adam Li --- .github/workflows/main.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 12182c95f..ea1f78ee8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -117,7 +117,6 @@ jobs: ./spin coverage --help ./spin test --help ./spin test - cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml - name: debug run: | @@ -133,7 +132,7 @@ jobs: path: $PWD/build build_and_test_slow: - name: Meson build ${{ matrix.os }} - py${{ matrix.python-version }} + name: Slow Meson build ${{ matrix.os }} - py${{ matrix.python-version }} timeout-minutes: 20 needs: [build_and_test] strategy: From 8df008d786e0d4ae5e5e6981d2551a12d256c8b0 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Tue, 3 Oct 2023 13:52:10 -0400 Subject: [PATCH 60/70] Adding parallelization test Signed-off-by: Adam Li --- sktree/stats/forestht.py | 3 +++ sktree/stats/tests/test_forestht.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 5ce124174..66bee9c15 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -819,6 +819,9 @@ def _statistic( ) # determine if there are any nans in the final posterior array + print(posterior_arr.shape) + print(posterior_arr) + print(np.nanmean(posterior_arr, axis=0).shape) temp_posterior_forest = np.nanmean(posterior_arr, axis=0) nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 925c62f1f..9b849ed22 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -389,7 +389,9 @@ def test_forestht_check_inputs(forest_hyppo): forest_hyppo.statistic(X, y_invalid) -def test_parallelization(): +@pytest.mark.parametrize("backend", ["loky", "threading"]) +@pytest.mark.parametrize("n_jobs", [1, -1]) +def test_parallelization(backend, n_jobs): """Test parallelization of training forests.""" n_samples = 100 n_features = 5 @@ -399,16 +401,14 @@ def test_parallelization(): def run_forest(covariate_index=None): clf = FeatureImportanceForestClassifier( estimator=HonestForestClassifier( - n_estimators=10, - random_state=seed, - n_jobs=1, + n_estimators=10, random_state=seed, n_jobs=n_jobs, honest_fraction=0.2 ), + test_size=0.5, ) - obs_stat = clf.statistic(X, y, metric="mi") - perm_stat = clf.statistic(X, y, covariate_index=[covariate_index], metric="mi") - return obs_stat, perm_stat + pvalue = clf.test(X, y, covariate_index=[covariate_index], metric="mi") + return pvalue - out = Parallel(n_jobs=1)( + out = Parallel(n_jobs=1, backend=backend)( delayed(run_forest)(covariate_index) for covariate_index in range(n_features) ) assert len(out) == n_features From 7964d9941952868e055fc77371634c436259340f Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Tue, 3 Oct 2023 20:07:48 -0400 Subject: [PATCH 61/70] FIX remove extra print statememts --- sktree/stats/forestht.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 66bee9c15..5ce124174 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -819,9 +819,6 @@ def _statistic( ) # determine if there are any nans in the final posterior array - print(posterior_arr.shape) - print(posterior_arr) - print(np.nanmean(posterior_arr, axis=0).shape) temp_posterior_forest = np.nanmean(posterior_arr, axis=0) nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] From 8b5a7d164585a06cb0e6bc98be8d05d094673d65 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 4 Oct 2023 08:54:45 -0400 Subject: [PATCH 62/70] Remove numpy nanmean warnings and also bug fix of some code (#133) * Update Signed-off-by: Adam Li * Fix submodule Signed-off-by: Adam Li * Possible change to might code Signed-off-by: Adam Li * Add fixes Signed-off-by: Adam Li * Fix style Signed-off-by: Adam Li --------- Signed-off-by: Adam Li --- .github/workflows/main.yml | 2 +- ...mpare_coleman_and_permutation_forest.ipynb | 2855 +---------------- ...t_MI_gigantic_hypothesis_testing_forest.py | 8 +- sktree/_lib/sklearn_fork | 2 +- sktree/stats/forestht.py | 89 +- sktree/stats/tests/test_forestht.py | 68 +- sktree/stats/utils.py | 82 +- 7 files changed, 283 insertions(+), 2823 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ea1f78ee8..a943e3d13 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -224,7 +224,7 @@ jobs: ./spin --help ./spin coverage --help ./spin test --help - ./spin coverage -k "slowtest" + ./spin coverage cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml - name: debug diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 8b5c34f46..3990764bf 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -55,9 +55,9 @@ " beta = 10.0\n", " sigma = 10.0 / sigma_factor\n", " n_samples = 2200\n", - " n_estimators = 125\n", + " n_estimators = 500\n", " test_size = 0.1\n", - " n_repeats = 500\n", + " n_repeats = 1000\n", " metric = \"mse\"\n", "\n", " rng = np.random.default_rng(seed)\n", @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -152,7 +152,7 @@ " max_features = \"sqrt\"\n", " test_size = 1.0 / 6\n", " metric = \"mse\"\n", - " n_repeats = 200\n", + " n_repeats = 500\n", " permute_per_tree = True\n", " sample_dataset_per_tree = True\n", "\n", @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -249,2814 +249,213 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.9940119760479041\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.36726546906187624\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X6: 1.0\n" + "X2/7: 0.23952095808383234\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X2/7: 0.49101796407185627\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.00998003992015968\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.3532934131736527\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] - }, + } + ], + "source": [ + "pvalue_dict = defaultdict(list)\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "j_space = np.linspace(0.005, 2.25, 9)\n", + "\n", + "for sigma_factor in j_space:\n", + " for idx in range(5):\n", + " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "\n", + " elements_dict = linear_model_ancova(\n", + " sigma_factor,\n", + " new_seed,\n", + " permute_per_tree=False,\n", + " sample_dataset_per_tree=False,\n", + " )\n", + " for key, value in elements_dict.items():\n", + " pvalue_dict[key].append(value)\n", + " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "\n", + "df = pd.DataFrame(pvalue_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5f4eb53c-c82d-4770-836a-552b910dd736", + "metadata": {}, + "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "[autoreload of sktree.stats.forestht failed: Traceback (most recent call last):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: statistic() requires a code object with 1 free vars, not 0\n", + "]\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b470b569-4391-40e2-b7c4-a8439cc026c0", + "metadata": {}, + "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "pvalue_dict = defaultdict(list)\n", - "rng = np.random.default_rng(seed)\n", - "\n", - "j_space = np.linspace(0.005, 2.25, 9)\n", - "\n", - "for sigma_factor in j_space:\n", - " for idx in range(5):\n", - " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", "\n", - " elements_dict = linear_model_ancova(\n", - " sigma_factor,\n", - " new_seed,\n", - " permute_per_tree=True,\n", - " sample_dataset_per_tree=False,\n", - " )\n", - " for key, value in elements_dict.items():\n", - " pvalue_dict[key].append(value)\n", - " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", "\n", - "df = pd.DataFrame(pvalue_dict)" + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" ] }, { diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 8408f4e03..fd4f84765 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -90,10 +90,10 @@ # computed as the proportion of samples in the null distribution that are less than the # observed test statistic. -n_estimators = 125 +n_estimators = 200 max_features = "sqrt" test_size = 0.2 -n_repeats = 500 +n_repeats = 1000 n_jobs = -1 est = FeatureImportanceForestClassifier( @@ -107,8 +107,8 @@ ), random_state=seed, test_size=test_size, - permute_per_tree=True, - sample_dataset_per_tree=True, + permute_per_tree=False, + sample_dataset_per_tree=False, ) # we test for the first feature set, which is important and thus should return a pvalue < 0.05 diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork index e2fee00aa..d9d1ea68f 160000 --- a/sktree/_lib/sklearn_fork +++ b/sktree/_lib/sklearn_fork @@ -1 +1 @@ -Subproject commit e2fee00aa461c21b8cfa59eb907d27972415c99b +Subproject commit d9d1ea68fde4f0bf90caff21dc26044ace3114ae diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 5ce124174..427de8251 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -23,6 +23,7 @@ POSTERIOR_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, + _non_nan_samples, train_tree, ) @@ -140,7 +141,7 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if not isinstance(covariate_index, (list, tuple, np.ndarray)): raise RuntimeError("covariate_index must be an iterable of integer indices") else: - if not all(isinstance(idx, int) for idx in covariate_index): + if not all(isinstance(idx, (np.integer, int)) for idx in covariate_index): raise RuntimeError("Not all covariate_index are integer indices") if self._n_samples_ is not None and X.shape[0] != self._n_samples_: @@ -362,7 +363,7 @@ def test( # the posteriors and computing the test statistic on the resampled posteriors if self.sample_dataset_per_tree: metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[observe_samples, :], + y_test=y, y_pred_proba_normal=observe_posteriors, y_pred_proba_perm=permute_posteriors, metric=metric, @@ -375,10 +376,13 @@ def test( # there is only one train and test split, so we can just use that _, indices_test = self.train_test_samples_[0] y_test = y[indices_test, :] + y_pred_proba_normal = observe_posteriors[:, indices_test, :] + y_pred_proba_perm = permute_posteriors[:, indices_test, :] + metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, + y_pred_proba_normal=y_pred_proba_normal, + y_pred_proba_perm=y_pred_proba_perm, metric=metric, n_repeats=n_repeats, seed=self.random_state, @@ -588,19 +592,15 @@ def _statistic( samples = indices_test y_true_final = y_test - # determine if there are any nans in the final posterior array - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) @@ -750,6 +750,7 @@ def _statistic( if predict_posteriors: # now initialize posterior array as (n_trees, n_samples_test, n_classes) + # XXX: currently assumes n_outputs_ == 1 posterior_arr = np.full( (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan ) @@ -818,19 +819,15 @@ def _statistic( f"AUC metric is not supported for {self._type_of_target_} targets." ) - # determine if there are any nans in the final posterior array - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: @@ -846,51 +843,3 @@ def _statistic( return stat, posterior_arr, samples return stat - - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mi", - return_posteriors: bool = False, - check_input: bool = True, - **metric_kwargs, - ): - """Compute the test statistic. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "mi", which computes Mutual Information. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - check_input : bool, optional - Whether or not to check the input, by default True. - **metric_kwargs : dict, optional - Additional keyword arguments to pass to the metric function. - - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - return super().statistic( - X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs - ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 9b849ed22..20fc68e52 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,7 +1,11 @@ +import pickle +from pathlib import Path + import numpy as np import pytest from flaky import flaky from joblib import Parallel, delayed +from numpy.testing import assert_array_equal from scipy.special import expit from sklearn import datasets @@ -124,8 +128,8 @@ def test_featureimportance_forest_errors(): "sample_dataset_per_tree": True, }, 300, # n_samples - 500, # n_repeats - 0.1, # test_size + 1000, # n_repeats + 0.2, # test_size ], ], ) @@ -205,12 +209,12 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) n_jobs=-1, ), "random_state": seed, - "permute_per_tree": True, - "sample_dataset_per_tree": True, + "permute_per_tree": False, + "sample_dataset_per_tree": False, }, - 600, - 200, - 1.0 / 6, + 600, # n_samples + 1000, # n_repeats + 1.0 / 6, # test_size ], ], ) @@ -266,7 +270,6 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @flaky(max_runs=2) -@pytest.mark.slowtest @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( @@ -408,7 +411,54 @@ def run_forest(covariate_index=None): pvalue = clf.test(X, y, covariate_index=[covariate_index], metric="mi") return pvalue - out = Parallel(n_jobs=1, backend=backend)( + out = Parallel(n_jobs=-1, backend=backend)( delayed(run_forest)(covariate_index) for covariate_index in range(n_features) ) assert len(out) == n_features + + +def test_pickle(tmpdir): + """Test that pickling works and preserves fitted attributes.""" + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + n_repeats = 1000 + + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + ) + stat, pvalue = clf.test(X, y, covariate_index=[1], metric="mi", n_repeats=n_repeats) + + with open(Path(tmpdir) / "clf.pkl", "wb") as fpath: + pickle.dump(clf, fpath) + + with open(Path(tmpdir) / "clf.pkl", "rb") as fpath: + clf_pickle = pickle.load(fpath) + + # recompute pvalue manually and compare + pickle_pvalue = ( + 1.0 + (clf_pickle.null_dist_ <= (clf_pickle.permute_stat_ - clf_pickle.observe_stat_)).sum() + ) / (1.0 + n_repeats) + assert pvalue == pickle_pvalue + assert clf_pickle.permute_stat_ - clf_pickle.observe_stat_ == stat + + attr_list = [ + "test_size", + "observe_samples_", + "y_true_final_", + "observe_posteriors_", + "observe_stat_", + "_is_fitted", + "permute_samples_", + "permute_posteriors_", + "permute_stat_", + "n_samples_test_", + "_n_samples_", + "_metric", + ] + for attr in attr_list: + assert_array_equal(getattr(clf, attr), getattr(clf_pickle, attr)) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 30ce83ff8..d2f36c1e4 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -16,12 +16,12 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: - """Compute estimate of mutual information. + """Compute estimate of mutual information for supervised classification setting. Parameters ---------- y_true : ArrayLike of shape (n_samples,) - _description_ + The true labels. y_pred_proba : ArrayLike of shape (n_samples, n_outputs) Posterior probabilities. @@ -41,21 +41,69 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: return H_Y - H_YX +def _cond_entropy(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: + """Compute estimate of entropy for supervised classification setting. + + H(Y | X) + + Parameters + ---------- + y_true : ArrayLike of shape (n_samples,) + The true labels. Not used in computation of the entropy. + y_pred_proba : ArrayLike of shape (n_samples, n_outputs) + Posterior probabilities. + + Returns + ------- + float : + The estimated MI. + """ + if y_true.squeeze().ndim != 1: + raise ValueError(f"y_true must be 1d, not {y_true.shape}") + + # entropy averaged over n_samples + H_YX = np.mean(entropy(y_pred_proba, base=np.exp(1), axis=1)) + return H_YX + + METRIC_FUNCTIONS = { "mse": mean_squared_error, "mae": mean_absolute_error, "balanced_accuracy": balanced_accuracy_score, "auc": roc_auc_score, "mi": _mutual_information, + "cond_entropy": _cond_entropy, } -POSTERIOR_FUNCTIONS = ("mi", "auc") +POSTERIOR_FUNCTIONS = ("mi", "auc", "cond_entropy") POSITIVE_METRICS = ("mi", "auc", "balanced_accuracy") REGRESSOR_METRICS = ("mse", "mae") +def _non_nan_samples(posterior_arr: ArrayLike) -> ArrayLike: + """Determine which samples are not nan in the posterior tree array. + + Parameters + ---------- + posterior_arr : ArrayLike of shape (n_trees, n_samples, n_outputs) + The 3D posterior array from the forest. + + Returns + ------- + nonnan_indices : ArrayLike of shape (n_nonnan_samples,) + The indices of the samples that are not nan in the posterior array + along axis=1. + """ + # Find the row indices with NaN values along the specified axis + nan_indices = np.isnan(posterior_arr).any(axis=2).all(axis=0) + + # Invert the boolean mask to get indices without NaN values + nonnan_indices = np.where(~nan_indices)[0] + return nonnan_indices + + def train_tree( tree: DecisionTreeClassifier, X: ArrayLike, @@ -218,16 +266,30 @@ def _compute_null_distribution_coleman( rng.shuffle(y_pred_ind_arr) # get random half of the posteriors from two sets of trees - first_forest_inds = y_pred_ind_arr[:n_samples_test] - second_forest_inds = y_pred_ind_arr[:n_samples_test] + first_forest_inds = y_pred_ind_arr[: n_estimators // 2] + second_forest_inds = y_pred_ind_arr[n_estimators // 2 :] + + # get random half of the posteriors as one forest + first_forest_pred = all_y_pred[first_forest_inds, ...] + second_forest_pred = all_y_pred[second_forest_inds, ...] + + # determine if there are any nans in the final posterior array, when + # averaged over the trees + first_forest_samples = _non_nan_samples(first_forest_pred) + second_forest_samples = _non_nan_samples(second_forest_pred) + + # todo: is this step necessary? + non_nan_samples = np.intersect1d( + first_forest_samples, second_forest_samples, assume_unique=True + ) - # get random half of the posteriors - y_pred_first_half = np.nanmean(all_y_pred[first_forest_inds], axis=0) - y_pred_second_half = np.nanmean(all_y_pred[second_forest_inds], axis=0) + # now average the posteriors over the trees for the non-nan samples + y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0) + y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0) # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test, y_pred_first_half) - second_half_metric = metric_func(y_test, y_pred_second_half) + first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half) + second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From 3a2279adb5370c192768cf5d50059a1757641c1b Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 4 Oct 2023 09:04:23 -0400 Subject: [PATCH 63/70] Add fixes Signed-off-by: Adam Li --- .github/workflows/main.yml | 2 +- ...mpare_coleman_and_permutation_forest.ipynb | 2855 +---------------- ...t_MI_gigantic_hypothesis_testing_forest.py | 8 +- sktree/_lib/sklearn_fork | 2 +- sktree/stats/forestht.py | 92 +- sktree/stats/tests/test_forestht.py | 68 +- sktree/stats/utils.py | 82 +- 7 files changed, 283 insertions(+), 2826 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ea1f78ee8..a943e3d13 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -224,7 +224,7 @@ jobs: ./spin --help ./spin coverage --help ./spin test --help - ./spin coverage -k "slowtest" + ./spin coverage cp $PWD/build-install/usr/lib/python${{matrix.python-version}}/site-packages/coverage.xml ./coverage.xml - name: debug diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 8b5c34f46..3990764bf 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "id": "5e2d1279-fa4f-47ef-aa48-fac6d47159ad", "metadata": {}, "outputs": [], @@ -55,9 +55,9 @@ " beta = 10.0\n", " sigma = 10.0 / sigma_factor\n", " n_samples = 2200\n", - " n_estimators = 125\n", + " n_estimators = 500\n", " test_size = 0.1\n", - " n_repeats = 500\n", + " n_repeats = 1000\n", " metric = \"mse\"\n", "\n", " rng = np.random.default_rng(seed)\n", @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "id": "3db4f740-afd9-413e-8089-a8245f2a0747", "metadata": {}, "outputs": [], @@ -152,7 +152,7 @@ " max_features = \"sqrt\"\n", " test_size = 1.0 / 6\n", " metric = \"mse\"\n", - " n_repeats = 200\n", + " n_repeats = 500\n", " permute_per_tree = True\n", " sample_dataset_per_tree = True\n", "\n", @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true @@ -249,2814 +249,213 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.9940119760479041\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.36726546906187624\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X6: 1.0\n" + "X2/7: 0.23952095808383234\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X2/7: 0.49101796407185627\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X2/7: 1.0\n" + "X1: 0.00998003992015968\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 1.0\n" + "X6: 0.3532934131736527\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" + "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n" ] - }, + } + ], + "source": [ + "pvalue_dict = defaultdict(list)\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "j_space = np.linspace(0.005, 2.25, 9)\n", + "\n", + "for sigma_factor in j_space:\n", + " for idx in range(5):\n", + " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "\n", + " elements_dict = linear_model_ancova(\n", + " sigma_factor,\n", + " new_seed,\n", + " permute_per_tree=False,\n", + " sample_dataset_per_tree=False,\n", + " )\n", + " for key, value in elements_dict.items():\n", + " pvalue_dict[key].append(value)\n", + " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "\n", + "df = pd.DataFrame(pvalue_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5f4eb53c-c82d-4770-836a-552b910dd736", + "metadata": {}, + "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" + "[autoreload of sktree.stats.forestht failed: Traceback (most recent call last):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: statistic() requires a code object with 1 free vars, not 0\n", + "]\n" ] }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "b470b569-4391-40e2-b7c4-a8439cc026c0", + "metadata": {}, + "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n", - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.001996007984031936\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/Documents/scikit-tree/sktree/stats/forestht.py:560: RuntimeWarning: Mean of empty slice\n", - " y_train = y_train.ravel()\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 1.0\n" - ] + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "pvalue_dict = defaultdict(list)\n", - "rng = np.random.default_rng(seed)\n", - "\n", - "j_space = np.linspace(0.005, 2.25, 9)\n", - "\n", - "for sigma_factor in j_space:\n", - " for idx in range(5):\n", - " new_seed = rng.integers(0, np.iinfo(np.uint32).max, dtype=np.uint32)\n", + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", "\n", - " elements_dict = linear_model_ancova(\n", - " sigma_factor,\n", - " new_seed,\n", - " permute_per_tree=True,\n", - " sample_dataset_per_tree=False,\n", - " )\n", - " for key, value in elements_dict.items():\n", - " pvalue_dict[key].append(value)\n", - " pvalue_dict[\"sigma_factor\"].append(sigma_factor)\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", "\n", - "df = pd.DataFrame(pvalue_dict)" + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" ] }, { diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 8408f4e03..fd4f84765 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -90,10 +90,10 @@ # computed as the proportion of samples in the null distribution that are less than the # observed test statistic. -n_estimators = 125 +n_estimators = 200 max_features = "sqrt" test_size = 0.2 -n_repeats = 500 +n_repeats = 1000 n_jobs = -1 est = FeatureImportanceForestClassifier( @@ -107,8 +107,8 @@ ), random_state=seed, test_size=test_size, - permute_per_tree=True, - sample_dataset_per_tree=True, + permute_per_tree=False, + sample_dataset_per_tree=False, ) # we test for the first feature set, which is important and thus should return a pvalue < 0.05 diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork index e2fee00aa..d9d1ea68f 160000 --- a/sktree/_lib/sklearn_fork +++ b/sktree/_lib/sklearn_fork @@ -1 +1 @@ -Subproject commit e2fee00aa461c21b8cfa59eb907d27972415c99b +Subproject commit d9d1ea68fde4f0bf90caff21dc26044ace3114ae diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 66bee9c15..427de8251 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -23,6 +23,7 @@ POSTERIOR_FUNCTIONS, REGRESSOR_METRICS, _compute_null_distribution_coleman, + _non_nan_samples, train_tree, ) @@ -140,7 +141,7 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if not isinstance(covariate_index, (list, tuple, np.ndarray)): raise RuntimeError("covariate_index must be an iterable of integer indices") else: - if not all(isinstance(idx, int) for idx in covariate_index): + if not all(isinstance(idx, (np.integer, int)) for idx in covariate_index): raise RuntimeError("Not all covariate_index are integer indices") if self._n_samples_ is not None and X.shape[0] != self._n_samples_: @@ -362,7 +363,7 @@ def test( # the posteriors and computing the test statistic on the resampled posteriors if self.sample_dataset_per_tree: metric_star, metric_star_pi = _compute_null_distribution_coleman( - y_test=y[observe_samples, :], + y_test=y, y_pred_proba_normal=observe_posteriors, y_pred_proba_perm=permute_posteriors, metric=metric, @@ -375,10 +376,13 @@ def test( # there is only one train and test split, so we can just use that _, indices_test = self.train_test_samples_[0] y_test = y[indices_test, :] + y_pred_proba_normal = observe_posteriors[:, indices_test, :] + y_pred_proba_perm = permute_posteriors[:, indices_test, :] + metric_star, metric_star_pi = _compute_null_distribution_coleman( y_test=y_test, - y_pred_proba_normal=observe_posteriors, - y_pred_proba_perm=permute_posteriors, + y_pred_proba_normal=y_pred_proba_normal, + y_pred_proba_perm=y_pred_proba_perm, metric=metric, n_repeats=n_repeats, seed=self.random_state, @@ -588,19 +592,15 @@ def _statistic( samples = indices_test y_true_final = y_test - # determine if there are any nans in the final posterior array - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: # Ignore all NaN values (samples not tested) -> (n_samples_final, n_outputs) @@ -750,6 +750,7 @@ def _statistic( if predict_posteriors: # now initialize posterior array as (n_trees, n_samples_test, n_classes) + # XXX: currently assumes n_outputs_ == 1 posterior_arr = np.full( (self.n_estimators, self._n_samples_, estimator.n_classes_), np.nan ) @@ -818,22 +819,15 @@ def _statistic( f"AUC metric is not supported for {self._type_of_target_} targets." ) - # determine if there are any nans in the final posterior array - print(posterior_arr.shape) - print(posterior_arr) - print(np.nanmean(posterior_arr, axis=0).shape) - temp_posterior_forest = np.nanmean(posterior_arr, axis=0) - nonnan_indices = np.where(~np.isnan(temp_posterior_forest).any(axis=1))[0] - - # Find the row indices with NaN values in any column - samples = nonnan_indices + # determine if there are any nans in the final posterior array, when + # averaged over the trees + samples = _non_nan_samples(posterior_arr) # Ignore all NaN values (samples not tested) - y_true_final = y[(nonnan_indices), :] - posterior_arr = posterior_arr[:, (nonnan_indices), :] + y_true_final = y[(samples), :] # Average all posteriors (n_samples_test, n_outputs) to compute the statistic - posterior_forest = np.nanmean(posterior_arr, axis=0) + posterior_forest = np.nanmean(posterior_arr[:, (samples), :], axis=0) stat = metric_func(y_true_final, posterior_forest, **metric_kwargs) if covariate_index is None: @@ -849,51 +843,3 @@ def _statistic( return stat, posterior_arr, samples return stat - - def statistic( - self, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, - metric="mi", - return_posteriors: bool = False, - check_input: bool = True, - **metric_kwargs, - ): - """Compute the test statistic. - - Parameters - ---------- - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The target matrix. - covariate_index : ArrayLike, optional of shape (n_covariates,) - The index array of covariates to shuffle, by default None. - metric : str, optional - The metric to compute, by default "mi", which computes Mutual Information. - return_posteriors : bool, optional - Whether or not to return the posteriors, by default False. - check_input : bool, optional - Whether or not to check the input, by default True. - **metric_kwargs : dict, optional - Additional keyword arguments to pass to the metric function. - - Returns - ------- - stat : float - The test statistic. - posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final), optional - If ``return_posteriors`` is True, then the posterior probabilities of the - samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` - if all samples are encountered in the test set of at least one tree in the - posterior computation. - samples : ArrayLike of shape (n_samples_final,), optional - The indices of the samples used in the final test. ``n_samples_final`` is - equal to ``n_samples`` if all samples are encountered in the test set of at - least one tree in the posterior computation. - """ - return super().statistic( - X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs - ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 9b849ed22..20fc68e52 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -1,7 +1,11 @@ +import pickle +from pathlib import Path + import numpy as np import pytest from flaky import flaky from joblib import Parallel, delayed +from numpy.testing import assert_array_equal from scipy.special import expit from sklearn import datasets @@ -124,8 +128,8 @@ def test_featureimportance_forest_errors(): "sample_dataset_per_tree": True, }, 300, # n_samples - 500, # n_repeats - 0.1, # test_size + 1000, # n_repeats + 0.2, # test_size ], ], ) @@ -205,12 +209,12 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) n_jobs=-1, ), "random_state": seed, - "permute_per_tree": True, - "sample_dataset_per_tree": True, + "permute_per_tree": False, + "sample_dataset_per_tree": False, }, - 600, - 200, - 1.0 / 6, + 600, # n_samples + 1000, # n_repeats + 1.0 / 6, # test_size ], ], ) @@ -266,7 +270,6 @@ def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, @flaky(max_runs=2) -@pytest.mark.slowtest @pytest.mark.parametrize("criterion", ["gini", "entropy"]) @pytest.mark.parametrize("honest_prior", ["empirical", "uniform"]) @pytest.mark.parametrize( @@ -408,7 +411,54 @@ def run_forest(covariate_index=None): pvalue = clf.test(X, y, covariate_index=[covariate_index], metric="mi") return pvalue - out = Parallel(n_jobs=1, backend=backend)( + out = Parallel(n_jobs=-1, backend=backend)( delayed(run_forest)(covariate_index) for covariate_index in range(n_features) ) assert len(out) == n_features + + +def test_pickle(tmpdir): + """Test that pickling works and preserves fitted attributes.""" + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + n_repeats = 1000 + + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + ) + stat, pvalue = clf.test(X, y, covariate_index=[1], metric="mi", n_repeats=n_repeats) + + with open(Path(tmpdir) / "clf.pkl", "wb") as fpath: + pickle.dump(clf, fpath) + + with open(Path(tmpdir) / "clf.pkl", "rb") as fpath: + clf_pickle = pickle.load(fpath) + + # recompute pvalue manually and compare + pickle_pvalue = ( + 1.0 + (clf_pickle.null_dist_ <= (clf_pickle.permute_stat_ - clf_pickle.observe_stat_)).sum() + ) / (1.0 + n_repeats) + assert pvalue == pickle_pvalue + assert clf_pickle.permute_stat_ - clf_pickle.observe_stat_ == stat + + attr_list = [ + "test_size", + "observe_samples_", + "y_true_final_", + "observe_posteriors_", + "observe_stat_", + "_is_fitted", + "permute_samples_", + "permute_posteriors_", + "permute_stat_", + "n_samples_test_", + "_n_samples_", + "_metric", + ] + for attr in attr_list: + assert_array_equal(getattr(clf, attr), getattr(clf_pickle, attr)) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index 30ce83ff8..d2f36c1e4 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -16,12 +16,12 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: - """Compute estimate of mutual information. + """Compute estimate of mutual information for supervised classification setting. Parameters ---------- y_true : ArrayLike of shape (n_samples,) - _description_ + The true labels. y_pred_proba : ArrayLike of shape (n_samples, n_outputs) Posterior probabilities. @@ -41,21 +41,69 @@ def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: return H_Y - H_YX +def _cond_entropy(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: + """Compute estimate of entropy for supervised classification setting. + + H(Y | X) + + Parameters + ---------- + y_true : ArrayLike of shape (n_samples,) + The true labels. Not used in computation of the entropy. + y_pred_proba : ArrayLike of shape (n_samples, n_outputs) + Posterior probabilities. + + Returns + ------- + float : + The estimated MI. + """ + if y_true.squeeze().ndim != 1: + raise ValueError(f"y_true must be 1d, not {y_true.shape}") + + # entropy averaged over n_samples + H_YX = np.mean(entropy(y_pred_proba, base=np.exp(1), axis=1)) + return H_YX + + METRIC_FUNCTIONS = { "mse": mean_squared_error, "mae": mean_absolute_error, "balanced_accuracy": balanced_accuracy_score, "auc": roc_auc_score, "mi": _mutual_information, + "cond_entropy": _cond_entropy, } -POSTERIOR_FUNCTIONS = ("mi", "auc") +POSTERIOR_FUNCTIONS = ("mi", "auc", "cond_entropy") POSITIVE_METRICS = ("mi", "auc", "balanced_accuracy") REGRESSOR_METRICS = ("mse", "mae") +def _non_nan_samples(posterior_arr: ArrayLike) -> ArrayLike: + """Determine which samples are not nan in the posterior tree array. + + Parameters + ---------- + posterior_arr : ArrayLike of shape (n_trees, n_samples, n_outputs) + The 3D posterior array from the forest. + + Returns + ------- + nonnan_indices : ArrayLike of shape (n_nonnan_samples,) + The indices of the samples that are not nan in the posterior array + along axis=1. + """ + # Find the row indices with NaN values along the specified axis + nan_indices = np.isnan(posterior_arr).any(axis=2).all(axis=0) + + # Invert the boolean mask to get indices without NaN values + nonnan_indices = np.where(~nan_indices)[0] + return nonnan_indices + + def train_tree( tree: DecisionTreeClassifier, X: ArrayLike, @@ -218,16 +266,30 @@ def _compute_null_distribution_coleman( rng.shuffle(y_pred_ind_arr) # get random half of the posteriors from two sets of trees - first_forest_inds = y_pred_ind_arr[:n_samples_test] - second_forest_inds = y_pred_ind_arr[:n_samples_test] + first_forest_inds = y_pred_ind_arr[: n_estimators // 2] + second_forest_inds = y_pred_ind_arr[n_estimators // 2 :] + + # get random half of the posteriors as one forest + first_forest_pred = all_y_pred[first_forest_inds, ...] + second_forest_pred = all_y_pred[second_forest_inds, ...] + + # determine if there are any nans in the final posterior array, when + # averaged over the trees + first_forest_samples = _non_nan_samples(first_forest_pred) + second_forest_samples = _non_nan_samples(second_forest_pred) + + # todo: is this step necessary? + non_nan_samples = np.intersect1d( + first_forest_samples, second_forest_samples, assume_unique=True + ) - # get random half of the posteriors - y_pred_first_half = np.nanmean(all_y_pred[first_forest_inds], axis=0) - y_pred_second_half = np.nanmean(all_y_pred[second_forest_inds], axis=0) + # now average the posteriors over the trees for the non-nan samples + y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0) + y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0) # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test, y_pred_first_half) - second_half_metric = metric_func(y_test, y_pred_second_half) + first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half) + second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From 3a4a4b4fe0524e8021839b3a7d2636c35868179d Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 4 Oct 2023 11:33:19 -0400 Subject: [PATCH 64/70] Add parallelization to the tree building and predicting posteriors Signed-off-by: Adam Li --- ...mpare_coleman_and_permutation_forest.ipynb | 412 +++++++++++++----- ...t_MI_gigantic_hypothesis_testing_forest.py | 5 +- sktree/stats/forestht.py | 89 ++-- sktree/stats/tests/test_forestht.py | 2 +- 4 files changed, 381 insertions(+), 127 deletions(-) diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index 3990764bf..f084e213b 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -239,112 +239,196 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "id": "14806903-933b-4e31-a2db-a3a45e0a6f82", "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n", - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X1: 0.9940119760479041\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.36726546906187624\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 0.23952095808383234\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X2/7: 0.49101796407185627\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n", - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "X1: 0.00998003992015968\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X6: 0.3532934131736527\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/sklearn/base.py:1151: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", - " return fit_method(estimator, *args, **kwargs)\n" + "X1: 0.16167664670658682\n", + "X6: 0.017964071856287425\n", + "X2/7: 0.313373253493014\n", + "X2/7: 0.043912175648702596\n", + "X1: 0.24550898203592814\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.8962075848303394\n", + "X2/7: 0.001996007984031936\n", + "X1: 0.9700598802395209\n", + "X6: 0.13572854291417166\n", + "X2/7: 0.9181636726546906\n", + "X2/7: 0.07584830339321358\n", + "X1: 0.9640718562874252\n", + "X6: 0.00998003992015968\n", + "X2/7: 0.9940119760479041\n", + "X2/7: 0.03792415169660679\n", + "X1: 1.0\n", + "X6: 0.11776447105788423\n", + "X2/7: 0.6347305389221557\n", + "X2/7: 0.0718562874251497\n", + "X1: 1.0\n", + "X6: 0.007984031936127744\n", + "X2/7: 0.9960079840319361\n", + "X2/7: 0.14770459081836326\n", + "X1: 0.9161676646706587\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9001996007984032\n", + "X2/7: 0.011976047904191617\n", + "X1: 0.9940119760479041\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.11976047904191617\n", + "X1: 0.874251497005988\n", + "X6: 0.11976047904191617\n", + "X2/7: 1.0\n", + "X2/7: 0.003992015968063872\n", + "X1: 1.0\n", + "X6: 0.005988023952095809\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.29740518962075846\n", + "X1: 0.936127744510978\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9001996007984032\n", + "X2/7: 0.00998003992015968\n", + "X1: 0.7265469061876247\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9940119760479041\n", + "X2/7: 0.07584830339321358\n", + "X1: 0.9441117764471058\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.19560878243512975\n", + "X1: 0.9660678642714571\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.07584830339321358\n", + "X1: 0.3912175648702595\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.7724550898203593\n", + "X2/7: 0.015968063872255488\n", + "X1: 0.013972055888223553\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.059880239520958084\n", + "X1: 0.3073852295409182\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.18962075848303392\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.281437125748503\n", + "X2/7: 0.03592814371257485\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9880239520958084\n", + "X2/7: 0.05788423153692615\n", + "X1: 0.6347305389221557\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9940119760479041\n", + "X2/7: 0.06187624750499002\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.02594810379241517\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.17165668662674652\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9880239520958084\n", + "X2/7: 0.01996007984031936\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.8842315369261478\n", + "X2/7: 0.033932135728542916\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9900199600798403\n", + "X2/7: 0.10578842315369262\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.5209580838323353\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.8982035928143712\n", + "X2/7: 0.03992015968063872\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9960079840319361\n", + "X2/7: 0.005988023952095809\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.4091816367265469\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9960079840319361\n", + "X2/7: 0.4311377245508982\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.0998003992015968\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9181636726546906\n", + "X2/7: 0.06986027944111776\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.14570858283433133\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.09780439121756487\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9660678642714571\n", + "X2/7: 0.059880239520958084\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9900199600798403\n", + "X2/7: 0.05588822355289421\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.09381237524950099\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.45708582834331335\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9900199600798403\n", + "X2/7: 0.20159680638722555\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.810379241516966\n", + "X2/7: 0.1656686626746507\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.20958083832335328\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.9960079840319361\n", + "X2/7: 0.033932135728542916\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.12375249500998003\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 0.998003992015968\n", + "X2/7: 0.19560878243512975\n", + "X1: 0.001996007984031936\n", + "X6: 0.001996007984031936\n", + "X2/7: 1.0\n", + "X2/7: 0.05588822355289421\n" ] } ], @@ -361,8 +445,8 @@ " elements_dict = linear_model_ancova(\n", " sigma_factor,\n", " new_seed,\n", - " permute_per_tree=False,\n", - " sample_dataset_per_tree=False,\n", + " permute_per_tree=True,\n", + " sample_dataset_per_tree=True,\n", " )\n", " for key, value in elements_dict.items():\n", " pvalue_dict[key].append(value)\n", @@ -371,6 +455,60 @@ "df = pd.DataFrame(pvalue_dict)" ] }, + { + "cell_type": "code", + "execution_count": 24, + "id": "48b832dd-aa16-4f11-94aa-8e4bdff5c957", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[autoreload of sktree.stats.forestht failed: Traceback (most recent call last):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: statistic() requires a code object with 1 free vars, not 0\n", + "]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Permutation per tree and sample dataset per tree)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, { "cell_type": "code", "execution_count": 19, @@ -425,6 +563,82 @@ "fig.tight_layout()" ] }, + { + "cell_type": "code", + "execution_count": 21, + "id": "631649cc-f99f-4ce2-9b42-a4a7d75bd5b1", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[autoreload of sktree.stats.forestht failed: Traceback (most recent call last):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 276, in check\n", + " superreload(m, reload, self.old_objects)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 500, in superreload\n", + " update_generic(old_obj, new_obj)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 349, in update_class\n", + " if update_generic(old_obj, new_obj):\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 397, in update_generic\n", + " update(a, b)\n", + " File \"/Users/adam2392/miniforge3/envs/sktree/lib/python3.9/site-packages/IPython/extensions/autoreload.py\", line 309, in update_function\n", + " setattr(old, name, getattr(new, name))\n", + "ValueError: statistic() requires a code object with 1 free vars, not 0\n", + "]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with FeatureImportanceRegressor (Original Coleman method)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "633dda38-68e6-4101-8657-86b75721f92a", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axs = plt.subplots(2, 2, figsize=(8, 6), sharey=True, sharex=True)\n", + "axs = axs.flatten()\n", + "\n", + "for ax, name in zip(axs, [\"X1\", \"X2\", \"X6\", \"X7\"]):\n", + " sns.lineplot(data=df, x=\"sigma_factor\", y=name, ax=ax, marker=\"o\")\n", + "\n", + " ax.axhline([0.05], ls=\"--\", color=\"red\", label=\"alpha\")\n", + " ax.set(title=name, ylabel=\"pvalue\", xlabel=\"SNR (10 / x)\")\n", + " ax.legend()\n", + "fig.suptitle(\n", + " \"Linear ANCOVA model with Coleman Forest (Permute per tree, but not sample separate dataset)\"\n", + ")\n", + "fig.tight_layout()" + ] + }, { "cell_type": "code", "execution_count": 15, diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index fd4f84765..034e5ebec 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -17,8 +17,9 @@ feature set (W) is weakly correlated with the target (y). Here, we are testing the null hypothesis: -``H0: I(X; y) - I(X, W; y) = 0`` -``HA: I(X; y) - I(X, W; y) > 0`` +- ``H0: I(X; y) - I(X, W; y) = 0`` +- ``HA: I(X; y) - I(X, W; y) < 0`` indicating that there is more mutual information with + respect to ``y`` where ``I`` is mutual information. For example, this could be true in the following settings, where X is our informative feature set and W is our uninformative feature set. diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index 427de8251..b63cbd894 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -1,6 +1,7 @@ -from typing import Callable, Tuple +from typing import Callable, Tuple, Union import numpy as np +from joblib import Parallel, delayed from numpy.typing import ArrayLike from sklearn.base import MetaEstimatorMixin, clone, is_classifier from sklearn.ensemble._forest import ForestClassifier as sklearnForestClassifier @@ -15,7 +16,7 @@ RandomForestClassifier, RandomForestRegressor, ) -from sktree._lib.sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sktree.tree import DecisionTreeClassifier, DecisionTreeRegressor from .utils import ( METRIC_FUNCTIONS, @@ -28,6 +29,35 @@ ) +def _parallel_build_trees_and_compute_posteriors( + trees, + idx: int, + indices_train: ArrayLike, + indices_test: ArrayLike, + X: ArrayLike, + y: ArrayLike, + covariate_index, + posterior_arr: ArrayLike, + predict_posteriors: bool, +): + """Parallel function to build trees and compute posteriors. + + This inherently assumes that the caller function defines the indices + for the training and testing data for each tree. + """ + tree: Union[DecisionTreeClassifier, DecisionTreeRegressor] = trees[idx] + train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + + if predict_posteriors: + # XXX: currently assumes n_outputs_ == 1 + y_pred = tree.predict_proba(X[indices_test, :]).reshape(-1, tree.n_classes_) + else: + y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) + + # Fill test set posteriors & set rest NaN + posterior_arr[idx, indices_test, :] = y_pred # posterior + + class BaseForestHT(MetaEstimatorMixin): observe_samples_: ArrayLike observe_posteriors_: ArrayLike @@ -471,9 +501,11 @@ class FeatureImportanceForestRegressor(BaseForestHT): y_true_final_ : ArrayLike of shape (n_samples_final,) The true labels of the samples used in the final test. - observe_posteriors_ : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or - (n_estimators, n_samples_final, n_classes) + observe_posteriors_ : ArrayLike of shape (n_estimators, n_samples, n_outputs) or + (n_estimators, n_samples, n_classes) The predicted posterior probabilities of the samples used in the final test. + For samples that are NaNs for all estimators, means the sample was not used + in the test set at all across all trees. null_dist_ : ArrayLike of shape (n_repeats,) The null distribution of the test statistic. @@ -551,15 +583,20 @@ def _statistic( posterior_arr = np.full((self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan) if self.permute_per_tree: - # now initialize posterior array as (n_trees, n_samples_test, n_outputs) - for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): - tree: DecisionTreeRegressor = estimator.estimators_[idx] - train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) - - y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - - # Fill test set posteriors & set rest NaN - posterior_arr[idx, indices_test, :] = y_pred # posterior + Parallel(n_jobs=estimator.n_jobs, verbose=self.verbose, prefer="threads")( + delayed(_parallel_build_trees_and_compute_posteriors)( + estimator.estimators_, + idx, + indices_train, + indices_test, + X, + y, + covariate_index, + posterior_arr, + False, + ) + for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()) + ) else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] @@ -760,18 +797,20 @@ def _statistic( (self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan ) if self.permute_per_tree: - for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()): - tree: DecisionTreeClassifier = estimator.estimators_[idx] - train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) - - if predict_posteriors: - # XXX: currently assumes n_outputs_ == 1 - y_pred = tree.predict_proba(X[indices_test, :]).reshape(-1, tree.n_classes_) - else: - y_pred = tree.predict(X[indices_test, :]).reshape(-1, tree.n_outputs_) - - # Fill test set posteriors & set rest NaN - posterior_arr[idx, indices_test, :] = y_pred # posterior + Parallel(n_jobs=estimator.n_jobs, verbose=self.verbose, prefer="threads")( + delayed(_parallel_build_trees_and_compute_posteriors)( + estimator.estimators_, + idx, + indices_train, + indices_test, + X, + y, + covariate_index, + posterior_arr, + predict_posteriors, + ) + for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()) + ) else: # fitting a forest will only get one unique train/test split indices_train, indices_test = self.train_test_samples_[0] diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 20fc68e52..628644223 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -319,7 +319,7 @@ def test_iris_pauc_statistic( clf.reset() if sample_dataset_per_tree and not permute_per_tree: # test in another test - pytest.skip() + return stat, pvalue = clf.test( X, From e91060f4042c1bbe7d8c0582bf282eb713efb695 Mon Sep 17 00:00:00 2001 From: Haoyin Xu Date: Wed, 4 Oct 2023 14:43:21 -0400 Subject: [PATCH 65/70] ENH add MIGHT example notebook on AUC --- .../notebooks/might_example_auc.ipynb | 252 ++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 benchmarks_nonasv/notebooks/might_example_auc.ipynb diff --git a/benchmarks_nonasv/notebooks/might_example_auc.ipynb b/benchmarks_nonasv/notebooks/might_example_auc.ipynb new file mode 100644 index 000000000..f844e5185 --- /dev/null +++ b/benchmarks_nonasv/notebooks/might_example_auc.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "711de268", + "metadata": {}, + "source": [ + "## Mutual Information for Genuine Hypothesis Testing (MIGHT)" + ] + }, + { + "cell_type": "markdown", + "id": "f7bc7387", + "metadata": {}, + "source": [ + "An example using `sktree.stats.FeatureImportanceForestClassifier` for nonparametric multivariate hypothesis test, on simulated datasets. Here, we present a simulation of how MIGHT is used to evaluate how a \"feature set is important for predicting the target\".\n", + "\n", + "We simulate a dataset with 1000 features, 500 samples, and a binary class target variable. Within each feature set, there is 500 features associated with one feature set, and another 500 features associated with another feature set. One could think of these for example as different datasets collected on the same patient in a biomedical setting.\n", + "The first feature set (X) is strongly correlated with the target, and the second feature set (W) is weakly correlated with the target (y).\n", + "\n", + "We then use MIGHT to calculate the partial AUC of these sets." + ] + }, + { + "cell_type": "markdown", + "id": "55286132", + "metadata": {}, + "source": [ + "### Installation (WIP)\n", + "\n", + "```\n", + "cd scikit-tree/\n", + "git checkout might\n", + "git pull\n", + "pip install -e .\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "07959f0d", + "metadata": {}, + "source": [ + "### Import dependencies & set random seed" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "916af34d", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy.special import expit\n", + "\n", + "from sktree import HonestForestClassifier\n", + "from sktree.stats import FeatureImportanceForestClassifier\n", + "from sktree.tree import DecisionTreeClassifier\n", + "\n", + "seed = 12345\n", + "rng = np.random.default_rng(seed)" + ] + }, + { + "cell_type": "markdown", + "id": "9ccfa9ac", + "metadata": {}, + "source": [ + "### Simulate data\n", + "\n", + "We simulate the two feature sets and the target variable. We then combine them into a single dataset to perform hypothesis testing." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c479d14e", + "metadata": {}, + "outputs": [], + "source": [ + "n_samples = 1000\n", + "n_features_set = 500\n", + "mean = 1.0\n", + "sigma = 2.0\n", + "beta = 5.0\n", + "\n", + "unimportant_mean = 0.0\n", + "unimportant_sigma = 4.5\n", + "\n", + "# first sample the informative features, and then the uniformative features\n", + "X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))\n", + "X_important = np.hstack(\n", + " [\n", + " X_important,\n", + " rng.normal(\n", + " loc=unimportant_mean,\n", + " scale=unimportant_sigma,\n", + " size=(n_samples, n_features_set - 10),\n", + " ),\n", + " ]\n", + ")\n", + "\n", + "X_unimportant = rng.normal(\n", + " loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)\n", + ")\n", + "X = np.hstack([X_important, X_unimportant])\n", + "\n", + "# simulate the binary target variable\n", + "y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)" + ] + }, + { + "cell_type": "markdown", + "id": "7afbe135", + "metadata": {}, + "source": [ + "### Use partial AUC as test statistic\n", + "\n", + "You can specify the max FPR in `statistic`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a3b78a92", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASH-90 / Partial AUC: 0.6049643724062328\n", + "Shape of Observed Samples: (1000,)\n", + "Shape of Tree Posteriors for the positive class: (125, 1000, 1)\n" + ] + } + ], + "source": [ + "# parameters that could be changed\n", + "n_estimators = 125\n", + "max_features = \"sqrt\"\n", + "metric = \"auc\"\n", + "test_size = 0.2\n", + "n_jobs = -1\n", + "honest_fraction = 0.7\n", + "max_fpr = 0.1\n", + "\n", + "est = FeatureImportanceForestClassifier(\n", + " estimator=HonestForestClassifier(\n", + " n_estimators=n_estimators,\n", + " max_features=max_features,\n", + " tree_estimator=DecisionTreeClassifier(),\n", + " random_state=seed,\n", + " honest_fraction=honest_fraction,\n", + " n_jobs=n_jobs,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=True,\n", + " sample_dataset_per_tree=True,\n", + ")\n", + "\n", + "# we test for the first feature set, which is important and thus should return a higher AUC\n", + "stat, posterior_arr, samples = est.statistic(\n", + " X_important,\n", + " y,\n", + " metric=metric,\n", + " return_posteriors=True,\n", + ")\n", + "\n", + "print(\"ASH-90 / Partial AUC:\", stat)\n", + "print(\"Shape of Observed Samples:\", samples.shape)\n", + "print(\"Shape of Tree Posteriors for the positive class:\", posterior_arr.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "b41d2a8e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASH-90 / Partial AUC: 0.4944796805261922\n", + "Shape of Observed Samples: (1000,)\n", + "Shape of Tree Posteriors for the positive class: (125, 1000, 1)\n" + ] + } + ], + "source": [ + "# Repeat for the second feature set\n", + "stat, posterior_arr, samples = est.statistic(\n", + " X_unimportant,\n", + " y,\n", + " metric=metric,\n", + " return_posteriors=True,\n", + ")\n", + "\n", + "print(\"ASH-90 / Partial AUC:\", stat)\n", + "print(\"Shape of Observed Samples:\", samples.shape)\n", + "print(\"Shape of Tree Posteriors for the positive class:\", posterior_arr.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "02e69f1c", + "metadata": {}, + "source": [ + "### All posteriors are saved within the model\n", + "\n", + "Extract the results from the model variables anytime. You can save the model with `pickle`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8d8f2f", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"ASH-90 / Partial AUC:\", est.observe_stat_)\n", + "print(\"Observed Samples:\", est.observe_samples_)\n", + "print(\"Tree Posteriors for the positive class:\", est.observe_posteriors_) # (n_trees, n_samples_test, 1)\n", + "print(\"True Labels:\", est.y_true_final_)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From efbd440ef77c676367102a25d15d483d191d872c Mon Sep 17 00:00:00 2001 From: Adam Li Date: Wed, 4 Oct 2023 15:33:14 -0400 Subject: [PATCH 66/70] Consolidate parallleization Signed-off-by: Adam Li --- README.md | 9 +- ...mpare_coleman_and_permutation_forest.ipynb | 6 +- .../forest_ht_independent_data.ipynb | 143 ++++++++++++++++++ ...t_MI_gigantic_hypothesis_testing_forest.py | 6 +- sktree/stats/forestht.py | 96 ++++++++++-- sktree/stats/tests/test_forestht.py | 73 ++++++++- sktree/stats/utils.py | 57 ++----- 7 files changed, 318 insertions(+), 72 deletions(-) create mode 100644 benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb diff --git a/README.md b/README.md index 4da75fef1..12076150c 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,6 @@ Installation ============ Our installation will try to follow scikit-learn installation as close as possible, as we contain Cython code subclassed, or inspired by the scikit-learn tree submodule. -AS OF NOW, scikit-tree is in development stage and the installation is still finicky due to the upstream scikit-learn's stalled refactoring PRs of the tree submodule. Once those are merged, the installation will be simpler. The current recommended installation is done locally with meson. - Dependencies ------------ @@ -40,11 +38,12 @@ We minimally require: Installation with Pip (https://pypi.org/project/scikit-tree/) ------------------------------------------------------------- +Installing with pip on a conda environment is the recommended route. pip install scikit-tree -Building locally with Meson (RECOMMENDED) ------------------------------------------ +Building locally with Meson (For developers) +-------------------------------------------- Make sure you have the necessary packages installed # install build dependencies @@ -96,7 +95,7 @@ You can also do the same thing using Meson/Ninja itself. Run the following to bu python -c "from sktree import tree" python -c "import sklearn; print(sklearn.__version__);" -Alternatively, you can use editable installs +After building locally, you can use editable installs (warning: this only registers Python changes locally) pip install --no-build-isolation --editable . diff --git a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb index f084e213b..cd38e171f 100644 --- a/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb +++ b/benchmarks_nonasv/notebooks/compare_coleman_and_permutation_forest.ipynb @@ -1488,11 +1488,9 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "0f1ea3aa-ebad-4ff5-ae7b-abe3aa8308b6", + "cell_type": "markdown", + "id": "1c50ea7c-fd59-46c1-9408-ac119592f855", "metadata": {}, - "outputs": [], "source": [] } ], diff --git a/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb new file mode 100644 index 000000000..b31e72003 --- /dev/null +++ b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a8602dd6-12b9-4ebf-a217-4cddca954cea", + "metadata": {}, + "source": [ + "# Test on Independent High-dimensional Data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ad09a023-04db-4748-bfe8-1ec34bc83061", + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "from scipy.special import expit\n", + "\n", + "from sktree import RandomForestClassifier, RandomForestRegressor\n", + "from sktree.stats import (\n", + " FeatureImportanceForestClassifier,\n", + " FeatureImportanceForestRegressor,\n", + " PermutationForestRegressor,\n", + ")\n", + "\n", + "seed = 12345" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91b2f4e9-b4df-4ff0-9e99-dc9a68ebc62c", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fa105af9-f0b6-4fbe-9d91-8b2a5493593a", + "metadata": {}, + "outputs": [], + "source": [ + "n_samples = 600\n", + "n_features = 500\n", + "\n", + "n_estimators = 125\n", + "n_jobs = -1\n", + "max_features = \"sqrt\"\n", + "test_size = 1.0 / 6\n", + "metric = \"mse\"\n", + "n_repeats = 500\n", + "permute_per_tree = True\n", + "sample_dataset_per_tree = True\n", + "\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "# initialize hypothesis tester\n", + "est = FeatureImportanceForestClassifier(\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "be324da0-524f-4eaa-9fa1-dbad27058dbc", + "metadata": {}, + "source": [ + "# Run Pvalue Computation Over Many Instances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b53a9ec-befb-471a-9a44-681bbd86b31b", + "metadata": {}, + "outputs": [], + "source": [ + "stats = []\n", + "pvalues = []\n", + "\n", + "for idx in range(500):\n", + " X = rng.standard_normal(size=(n_samples, n_features))\n", + " y = rng.binomial(1, 0.5, size=n_samples) # .reshape(-1, 1)\n", + " stat, pvalue = est.test(X, y, covariate_index=[0], metric=\"mi\")\n", + " est.reset()\n", + " \n", + " stats.append(stat)\n", + " pvalues.append(pvalue)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff616595-bad5-4845-94b9-713c11dc5745", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sktree", + "language": "python", + "name": "sktree" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/plot_MI_gigantic_hypothesis_testing_forest.py index 034e5ebec..423bc63dc 100644 --- a/examples/plot_MI_gigantic_hypothesis_testing_forest.py +++ b/examples/plot_MI_gigantic_hypothesis_testing_forest.py @@ -108,10 +108,14 @@ ), random_state=seed, test_size=test_size, - permute_per_tree=False, + permute_per_tree=True, sample_dataset_per_tree=False, ) +print( + f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: " + f"{est.sample_dataset_per_tree}" +) # we test for the first feature set, which is important and thus should return a pvalue < 0.05 stat, pvalue = est.test( X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index b63cbd894..c49f28275 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -11,10 +11,13 @@ from sklearn.utils.validation import _is_fitted, check_X_y from sktree._lib.sklearn.ensemble._forest import ( + BaseForest, ForestClassifier, ForestRegressor, RandomForestClassifier, RandomForestRegressor, + _get_n_samples_bootstrap, + _parallel_build_trees, ) from sktree.tree import DecisionTreeClassifier, DecisionTreeRegressor @@ -25,12 +28,11 @@ REGRESSOR_METRICS, _compute_null_distribution_coleman, _non_nan_samples, - train_tree, ) def _parallel_build_trees_and_compute_posteriors( - trees, + forest: BaseForest, idx: int, indices_train: ArrayLike, indices_test: ArrayLike, @@ -39,14 +41,56 @@ def _parallel_build_trees_and_compute_posteriors( covariate_index, posterior_arr: ArrayLike, predict_posteriors: bool, + permute_per_tree: bool, + sample_weight: ArrayLike = None, + class_weight=None, + missing_values_in_feature_mask=None, + classes=None, ): """Parallel function to build trees and compute posteriors. This inherently assumes that the caller function defines the indices for the training and testing data for each tree. """ - tree: Union[DecisionTreeClassifier, DecisionTreeRegressor] = trees[idx] - train_tree(tree, X[indices_train, :], y[indices_train, :], covariate_index) + tree: Union[DecisionTreeClassifier, DecisionTreeRegressor] = forest.estimators_[idx] + if permute_per_tree and covariate_index is not None: + random_state = tree.random_state + else: + random_state = forest.random_state + + X_train = X[indices_train, :] + y_train = y[indices_train, ...] + rng = np.random.default_rng(random_state) + + if forest.bootstrap: + n_samples_bootstrap = _get_n_samples_bootstrap( + n_samples=X_train.shape[0], max_samples=forest.max_samples + ) + else: + n_samples_bootstrap = None + + # individual tree permutation of y labels + if covariate_index is not None: + indices = np.arange(X_train.shape[0], dtype=int) + # perform permutation of covariates + index_arr = rng.choice(indices, size=(X_train.shape[0], 1), replace=False, shuffle=True) + perm_X_cov = X_train[index_arr, covariate_index] + X_train[:, covariate_index] = perm_X_cov + + tree = _parallel_build_trees( + tree, + forest.bootstrap, + X_train, + y_train, + sample_weight, + idx, + len(forest.estimators_), + verbose=0, + class_weight=class_weight, + n_samples_bootstrap=n_samples_bootstrap, + missing_values_in_feature_mask=missing_values_in_feature_mask, + classes=classes, + ) if predict_posteriors: # XXX: currently assumes n_outputs_ == 1 @@ -114,7 +158,7 @@ def _get_estimators_indices(self): indices = np.arange(self._n_samples_, dtype=int) # Get drawn indices along both sample and feature axes - if self.permute_per_tree and self.sample_dataset_per_tree: + if self.sample_dataset_per_tree: for tree in self.estimator_.estimators_: seed = tree.random_state @@ -127,7 +171,10 @@ def _get_estimators_indices(self): yield indices_train, indices_test else: indices_train, indices_test = train_test_split( - indices, test_size=self.test_size, shuffle=True, random_state=self.random_state + indices, + test_size=self.test_size, + shuffle=True, + random_state=self.estimator_.random_state, ) for tree in self.estimator_.estimators_: yield indices_train, indices_test @@ -145,6 +192,9 @@ def train_test_samples_(self): to reduce the object memory footprint by not storing the sampling data. Thus fetching the property may be slower than expected. """ + if self._n_samples_ is None: + raise RuntimeError("The estimator must be fitted before accessing this attribute.") + return [ (indices_train, indices_test) for indices_train, indices_test in self._get_estimators_indices() @@ -174,6 +224,12 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if not all(isinstance(idx, (np.integer, int)) for idx in covariate_index): raise RuntimeError("Not all covariate_index are integer indices") + if self.test_size * X.shape[0] < 2: + raise RuntimeError( + f"There are less than 2 testing samples used with " + f"test_size={self.test_size} for X ({X.shape})." + ) + if self._n_samples_ is not None and X.shape[0] != self._n_samples_: raise RuntimeError( f"X must have {self._n_samples_} samples, got {X.shape[0]}. " @@ -244,8 +300,8 @@ def statistic( if self._type_of_target_ is None: self._type_of_target_ = type_of_target(y) - if self.sample_dataset_per_tree and not self.permute_per_tree: - raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True") + # if self.sample_dataset_per_tree and not self.permute_per_tree: + # raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True") if covariate_index is None: self.estimator_ = self._get_estimator() @@ -277,11 +333,11 @@ def statistic( else: estimator.fit(X[:2], y[:2]) - # permute per tree + # sampling a separate train/test per tree if self.sample_dataset_per_tree: self.n_samples_test_ = self._n_samples_ else: - # not permute per tree + # here we fix a training/testing dataset test_size_ = int(self.test_size * self._n_samples_) # Fit each tree and compute posteriors with train test splits @@ -316,7 +372,7 @@ def test( self, X, y, - covariate_index: ArrayLike = None, + covariate_index: ArrayLike, metric: str = "mi", n_repeats: int = 1000, return_posteriors: bool = True, @@ -582,10 +638,13 @@ def _statistic( rng = np.random.default_rng(self.random_state) posterior_arr = np.full((self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan) - if self.permute_per_tree: + + # both sampling dataset per tree or permuting per tree requires us to bypass the + # sklearn API to fit each tree individually + if self.sample_dataset_per_tree or self.permute_per_tree: Parallel(n_jobs=estimator.n_jobs, verbose=self.verbose, prefer="threads")( delayed(_parallel_build_trees_and_compute_posteriors)( - estimator.estimators_, + estimator, idx, indices_train, indices_test, @@ -594,6 +653,7 @@ def _statistic( covariate_index, posterior_arr, False, + self.permute_per_tree, ) for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()) ) @@ -778,7 +838,7 @@ def _statistic( ): """Helper function to compute the test statistic.""" metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric] - rng = np.random.default_rng(self.random_state) + rng = np.random.default_rng(estimator.random_state) if metric in POSTERIOR_FUNCTIONS: predict_posteriors = True @@ -796,10 +856,13 @@ def _statistic( posterior_arr = np.full( (self.n_estimators, self._n_samples_, estimator.n_outputs_), np.nan ) - if self.permute_per_tree: + + # both sampling dataset per tree or permuting per tree requires us to bypass the + # sklearn API to fit each tree individually + if self.sample_dataset_per_tree or self.permute_per_tree: Parallel(n_jobs=estimator.n_jobs, verbose=self.verbose, prefer="threads")( delayed(_parallel_build_trees_and_compute_posteriors)( - estimator.estimators_, + estimator, idx, indices_train, indices_test, @@ -808,6 +871,7 @@ def _statistic( covariate_index, posterior_arr, predict_posteriors, + self.permute_per_tree, ) for idx, (indices_train, indices_test) in enumerate(self._get_estimators_indices()) ) diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 628644223..37a46173a 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -17,6 +17,7 @@ PermutationForestClassifier, PermutationForestRegressor, ) +from sktree.stats.utils import _non_nan_samples from sktree.tree import ObliqueDecisionTreeClassifier # load the iris dataset (n_samples, 4) @@ -82,8 +83,11 @@ def test_featureimportance_forest_errors(): permute_per_tree=permute_per_tree, sample_dataset_per_tree=sample_dataset_per_tree, ) - with pytest.raises(ValueError, match="sample_dataset_per_tree"): - est.statistic(iris_X[:10], iris_y[:10]) + with pytest.raises(RuntimeError, match="The estimator must be fitted"): + est.train_test_samples_ + + with pytest.raises(RuntimeError, match="There are less than 2 testing samples"): + est.statistic(iris_X[:5], iris_y[:5]) est = FeatureImportanceForestClassifier(estimator=RandomForestRegressor) with pytest.raises(RuntimeError, match="Estimator must be"): @@ -200,6 +204,7 @@ def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size) 1.0 / 6, ], [ + # XXX: Currently does not work with permute and sample dataset per tree FeatureImportanceForestClassifier, { "estimator": RandomForestClassifier( @@ -306,7 +311,7 @@ def test_iris_pauc_statistic( tree_estimator=estimator, honest_prior=honest_prior, random_state=0, - n_jobs=-1, + n_jobs=1, ), test_size=test_size, sample_dataset_per_tree=sample_dataset_per_tree, @@ -462,3 +467,65 @@ def test_pickle(tmpdir): ] for attr in attr_list: assert_array_equal(getattr(clf, attr), getattr(clf_pickle, attr)) + + +@pytest.mark.parametrize("permute_per_tree", [True, False]) +@pytest.mark.parametrize("sample_dataset_per_tree", [True, False]) +def test_sample_size_consistency_of_estimator_indices_(permute_per_tree, sample_dataset_per_tree): + """Test that the test-sample indices are what is expected.""" + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + permute_per_tree=permute_per_tree, + sample_dataset_per_tree=sample_dataset_per_tree, + ) + + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + + _, posteriors, samples = clf.statistic( + X, y, covariate_index=None, return_posteriors=True, metric="mi" + ) + if sample_dataset_per_tree: + assert_array_equal( + samples, + sorted(np.unique(np.concatenate([x[1] for x in clf.train_test_samples_]).flatten())), + ) + else: + assert_array_equal(samples, sorted(clf.train_test_samples_[0][1])) + assert len(_non_nan_samples(posteriors)) == len(samples) + + +def test_permute_per_tree_samples_consistency_with_sklearnforest(): + n_samples = 100 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + permute_per_tree=True, + sample_dataset_per_tree=False, + ) + + other_clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.2 + ), + test_size=0.5, + permute_per_tree=False, + sample_dataset_per_tree=False, + ) + + clf.statistic(X, y, covariate_index=None, metric="mi") + other_clf.statistic(X, y, covariate_index=None, metric="mi") + + assert_array_equal(clf.train_test_samples_[0][0], other_clf.train_test_samples_[0][0]) + assert_array_equal(clf.train_test_samples_[0][1], other_clf.train_test_samples_[0][1]) diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py index d2f36c1e4..2d36c1e22 100644 --- a/sktree/stats/utils.py +++ b/sktree/stats/utils.py @@ -12,7 +12,6 @@ from sklearn.utils.validation import check_X_y from sktree._lib.sklearn.ensemble._forest import ForestClassifier -from sktree._lib.sklearn.tree import DecisionTreeClassifier def _mutual_information(y_true: ArrayLike, y_pred_proba: ArrayLike) -> float: @@ -104,41 +103,6 @@ def _non_nan_samples(posterior_arr: ArrayLike) -> ArrayLike: return nonnan_indices -def train_tree( - tree: DecisionTreeClassifier, - X: ArrayLike, - y: ArrayLike, - covariate_index: ArrayLike = None, -) -> ArrayLike: - """Compute the posterior from each tree on the "OOB" samples. - - Parameters - ---------- - tree : DecisionTreeClassifier - The tree to compute the posterior from. - X : ArrayLike of shape (n_samples, n_features) - The data matrix. - y : ArrayLike of shape (n_samples, n_outputs) - The output matrix. - covariate_index : ArrayLike of shape (n_covariates,), optional - The indices of the covariates to permute, by default None, which - does not permute any columns. - """ - # seed the random number generator using each tree's random seed(?) - rng = np.random.default_rng(tree.random_state) - - indices = np.arange(X.shape[0], dtype=int) - - if covariate_index is not None: - # perform permutation of covariates - index_arr = rng.choice(indices, size=(X.shape[0], 1), replace=False, shuffle=True) - perm_X_cov = X[index_arr, covariate_index] - X[:, covariate_index] = perm_X_cov - - # individual tree permutation of y labels - tree.fit(X, y, check_input=False) - - def _compute_null_distribution_perm( X_train: ArrayLike, y_train: ArrayLike, @@ -279,17 +243,24 @@ def _compute_null_distribution_coleman( second_forest_samples = _non_nan_samples(second_forest_pred) # todo: is this step necessary? - non_nan_samples = np.intersect1d( - first_forest_samples, second_forest_samples, assume_unique=True - ) + # non_nan_samples = np.intersect1d( + # first_forest_samples, second_forest_samples, assume_unique=True + # ) # now average the posteriors over the trees for the non-nan samples - y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0) - y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0) + # y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0) + # y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0) + + # # compute two instances of the metric from the sampled trees + # first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half) + # second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half) + + y_pred_first_half = np.nanmean(first_forest_pred[:, first_forest_samples, :], axis=0) + y_pred_second_half = np.nanmean(second_forest_pred[:, second_forest_samples, :], axis=0) # compute two instances of the metric from the sampled trees - first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half) - second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half) + first_half_metric = metric_func(y_test[first_forest_samples, :], y_pred_first_half) + second_half_metric = metric_func(y_test[second_forest_samples, :], y_pred_second_half) metric_star[idx] = first_half_metric metric_star_pi[idx] = second_half_metric From be16e5a8dccb9d786580efb28df93526cd2a408e Mon Sep 17 00:00:00 2001 From: Sambit Panda Date: Wed, 4 Oct 2023 23:25:00 -0400 Subject: [PATCH 67/70] set default for covariate_index in ForestHT test --- sktree/stats/forestht.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index c49f28275..fa1d6950a 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -372,7 +372,7 @@ def test( self, X, y, - covariate_index: ArrayLike, + covariate_index: ArrayLike = None, metric: str = "mi", n_repeats: int = 1000, return_posteriors: bool = True, From 26b5b5feb7f1b19c81b7fb26d20bef1fb12049b3 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 5 Oct 2023 00:10:45 -0400 Subject: [PATCH 68/70] Add unit-test for small sample sizes Signed-off-by: Adam Li --- .../forest_ht_independent_data.ipynb | 66 +++++- sktree/stats/forestht.py | 38 ++- sktree/stats/tests/test_coleman.py | 191 +++++++++++++++ sktree/stats/tests/test_forestht.py | 223 +++--------------- 4 files changed, 317 insertions(+), 201 deletions(-) create mode 100644 sktree/stats/tests/test_coleman.py diff --git a/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb index b31e72003..fe8e153ce 100644 --- a/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb +++ b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "id": "ad09a023-04db-4748-bfe8-1ec34bc83061", "metadata": {}, "outputs": [], @@ -36,10 +36,19 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "id": "91b2f4e9-b4df-4ff0-9e99-dc9a68ebc62c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2" @@ -47,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "id": "fa105af9-f0b6-4fbe-9d91-8b2a5493593a", "metadata": {}, "outputs": [], @@ -61,8 +70,8 @@ "test_size = 1.0 / 6\n", "metric = \"mse\"\n", "n_repeats = 500\n", - "permute_per_tree = True\n", - "sample_dataset_per_tree = True\n", + "permute_per_tree = False\n", + "sample_dataset_per_tree = False\n", "\n", "rng = np.random.default_rng(seed)\n", "\n", @@ -104,10 +113,9 @@ " y = rng.binomial(1, 0.5, size=n_samples) # .reshape(-1, 1)\n", " stat, pvalue = est.test(X, y, covariate_index=[0], metric=\"mi\")\n", " est.reset()\n", - " \n", + "\n", " stats.append(stat)\n", - " pvalues.append(pvalue)\n", - " " + " pvalues.append(pvalue)" ] }, { @@ -116,6 +124,46 @@ "id": "ff616595-bad5-4845-94b9-713c11dc5745", "metadata": {}, "outputs": [], + "source": [ + "plt.plot(pvalues)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4569a24-4b12-4266-8d6c-7e740737c59f", + "metadata": {}, + "outputs": [], + "source": [ + "stats = []\n", + "pvalues = []\n", + "\n", + "for idx in range(500):\n", + " X = rng.standard_normal(size=(n_samples, n_features))\n", + " y = rng.binomial(1, 0.5, size=n_samples) # .reshape(-1, 1)\n", + " stat, pvalue = est.test(X, y, covariate_index=[0], metric=\"mi\")\n", + " est.reset()\n", + "\n", + " stats.append(stat)\n", + " pvalues.append(pvalue)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2da6a320-605a-40d6-9f8f-1aa1ff03110e", + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(pvalues, \"x\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "42ecee87-31b9-4c52-b867-348569050142", + "metadata": {}, + "outputs": [], "source": [] } ], diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py index fa1d6950a..122b5f377 100644 --- a/sktree/stats/forestht.py +++ b/sktree/stats/forestht.py @@ -224,9 +224,9 @@ def _check_input(self, X: ArrayLike, y: ArrayLike, covariate_index: ArrayLike = if not all(isinstance(idx, (np.integer, int)) for idx in covariate_index): raise RuntimeError("Not all covariate_index are integer indices") - if self.test_size * X.shape[0] < 2: + if self.test_size * X.shape[0] < 5: raise RuntimeError( - f"There are less than 2 testing samples used with " + f"There are less than 5 testing samples used with " f"test_size={self.test_size} for X ({X.shape})." ) @@ -619,6 +619,40 @@ def statistic( check_input: bool = True, **metric_kwargs, ): + """Compute the test statistic. + + Parameters + ---------- + X : ArrayLike of shape (n_samples, n_features) + The data matrix. + y : ArrayLike of shape (n_samples, n_outputs) + The target matrix. + covariate_index : ArrayLike, optional of shape (n_covariates,) + The index array of covariates to shuffle, by default None. + metric : str, optional + The metric to compute, by default "mse". + return_posteriors : bool, optional + Whether or not to return the posteriors, by default False. + check_input : bool, optional + Whether or not to check the input, by default True. + **metric_kwargs : dict, optional + Additional keyword arguments to pass to the metric function. + + Returns + ------- + stat : float + The test statistic. + posterior_final : ArrayLike of shape (n_estimators, n_samples_final, n_outputs) or + (n_estimators, n_samples_final), optional + If ``return_posteriors`` is True, then the posterior probabilities of the + samples used in the final test. ``n_samples_final`` is equal to ``n_samples`` + if all samples are encountered in the test set of at least one tree in the + posterior computation. + samples : ArrayLike of shape (n_samples_final,), optional + The indices of the samples used in the final test. ``n_samples_final`` is + equal to ``n_samples`` if all samples are encountered in the test set of at + least one tree in the posterior computation. + """ return super().statistic( X, y, covariate_index, metric, return_posteriors, check_input, **metric_kwargs ) diff --git a/sktree/stats/tests/test_coleman.py b/sktree/stats/tests/test_coleman.py new file mode 100644 index 000000000..58c6d6f73 --- /dev/null +++ b/sktree/stats/tests/test_coleman.py @@ -0,0 +1,191 @@ +import numpy as np +import pytest +from flaky import flaky +from scipy.special import expit + +from sktree import RandomForestClassifier, RandomForestRegressor +from sktree.stats import ( + FeatureImportanceForestClassifier, + FeatureImportanceForestRegressor, + PermutationForestClassifier, + PermutationForestRegressor, +) + +seed = 12345 +rng = np.random.default_rng(seed) + + +@flaky(max_runs=3) +@pytest.mark.slowtest +@pytest.mark.parametrize( + "hypotester, model_kwargs, n_samples, n_repeats, test_size", + [ + [ + PermutationForestRegressor, + { + "estimator": RandomForestRegressor( + max_features="sqrt", + random_state=seed, + n_estimators=75, + n_jobs=-1, + ), + "random_state": seed, + }, + 300, + 50, + 0.1, + ], + [ + FeatureImportanceForestRegressor, + { + "estimator": RandomForestRegressor( + max_features="sqrt", + random_state=seed, + n_estimators=125, + n_jobs=-1, + ), + "random_state": seed, + "permute_per_tree": True, + "sample_dataset_per_tree": True, + }, + 300, # n_samples + 1000, # n_repeats + 0.2, # test_size + ], + ], +) +def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): + r"""Test hypothesis testing forests using MSE from linear model simulation. + + See https://arxiv.org/pdf/1904.07830.pdf Figure 1. + + Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon + """ + beta = 15.0 + sigma = 0.05 + metric = "mse" + + # sample covariates + X_15 = rng.uniform(0, 1, size=(n_samples, 5)) + X_610 = np.zeros((n_samples, 5)) + for idx in range(5): + X_610[:, idx] = np.argwhere( + rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) + )[:, 1] + X = np.concatenate((X_15, X_610), axis=1) + assert X.shape == (n_samples, 10) + + # sample noise + epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) + + # compute final y of (n_samples,) + y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon + est = hypotester(test_size=test_size, **model_kwargs) + + # test for X_1 + stat, pvalue = est.test(X, y, [0], metric=metric, n_repeats=n_repeats) + print("X1: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for X_6 + stat, pvalue = est.test(X, y, [5], metric=metric, n_repeats=n_repeats) + print("X6: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for a few unimportant other X + for covariate_index in [1, 6]: + # test for X_2, X_7 + stat, pvalue = est.test(X, y, [covariate_index], metric=metric, n_repeats=n_repeats) + print("X2/7: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" + + +@flaky(max_runs=3) +@pytest.mark.slowtest +@pytest.mark.parametrize( + "hypotester, model_kwargs, n_samples, n_repeats, test_size", + [ + [ + PermutationForestClassifier, + { + "estimator": RandomForestClassifier( + max_features="sqrt", + random_state=seed, + n_estimators=50, + n_jobs=-1, + ), + "random_state": seed, + }, + 600, + 50, + 1.0 / 6, + ], + [ + # XXX: Currently does not work with permute and sample dataset per tree + FeatureImportanceForestClassifier, + { + "estimator": RandomForestClassifier( + max_features="sqrt", + random_state=seed, + n_estimators=100, + n_jobs=-1, + ), + "random_state": seed, + "permute_per_tree": False, + "sample_dataset_per_tree": False, + }, + 600, # n_samples + 1000, # n_repeats + 1.0 / 6, # test_size + ], + ], +) +def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): + r"""Test MIGHT using MSE from linear model simulation. + + See https://arxiv.org/pdf/1904.07830.pdf Figure 1. + + P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) + """ + beta = 10.0 + metric = "mse" + + n = 100 # Number of time steps + ar_coefficient = 0.015 + + X = np.zeros((n_samples, n)) + for idx in range(n_samples): + # sample covariates + white_noise = rng.standard_normal(size=n) + + # Create an array to store the simulated AR(1) time series + ar1_series = np.zeros(n) + ar1_series[0] = white_noise[0] + + # Simulate the AR(1) process + for t in range(1, n): + ar1_series[t] = ar_coefficient * ar1_series[t - 1] + white_noise[t] + + X[idx, :] = ar1_series + + # now compute the output labels + y_proba = expit(beta * X[:, 1:5].sum(axis=1)) + assert y_proba.shape == (n_samples,) + y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) + + est = hypotester(test_size=test_size, **model_kwargs) + + # test for X_2 important + stat, pvalue = est.test(X.copy(), y.copy(), [1], n_repeats=n_repeats, metric=metric) + print("X2: ", pvalue) + assert pvalue < 0.05, f"pvalue: {pvalue}" + + # test for X_1 unimportant + stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric) + print("X1: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" + + # test for X_500 unimportant + stat, pvalue = est.test(X.copy(), y.copy(), [n - 1], n_repeats=n_repeats, metric=metric) + print("X500: ", pvalue) + assert pvalue > 0.05, f"pvalue: {pvalue}" diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index 37a46173a..de7a4eb5e 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -6,17 +6,11 @@ from flaky import flaky from joblib import Parallel, delayed from numpy.testing import assert_array_equal -from scipy.special import expit from sklearn import datasets from sktree import HonestForestClassifier, RandomForestClassifier, RandomForestRegressor from sktree._lib.sklearn.tree import DecisionTreeClassifier -from sktree.stats import ( - FeatureImportanceForestClassifier, - FeatureImportanceForestRegressor, - PermutationForestClassifier, - PermutationForestRegressor, -) +from sktree.stats import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor from sktree.stats.utils import _non_nan_samples from sktree.tree import ObliqueDecisionTreeClassifier @@ -80,198 +74,23 @@ def test_featureimportance_forest_errors(): estimator=RandomForestClassifier( n_estimators=10, ), + test_size=0.5, permute_per_tree=permute_per_tree, sample_dataset_per_tree=sample_dataset_per_tree, ) with pytest.raises(RuntimeError, match="The estimator must be fitted"): est.train_test_samples_ - with pytest.raises(RuntimeError, match="There are less than 2 testing samples"): + with pytest.raises(RuntimeError, match="There are less than 5 testing samples"): est.statistic(iris_X[:5], iris_y[:5]) - est = FeatureImportanceForestClassifier(estimator=RandomForestRegressor) + est = FeatureImportanceForestClassifier(estimator=RandomForestRegressor, test_size=0.5) with pytest.raises(RuntimeError, match="Estimator must be"): - est.statistic(iris_X[:10], iris_y[:10]) + est.statistic(iris_X[:20], iris_y[:20]) - est = FeatureImportanceForestRegressor(estimator=RandomForestClassifier) + est = FeatureImportanceForestRegressor(estimator=RandomForestClassifier, test_size=0.5) with pytest.raises(RuntimeError, match="Estimator must be"): - est.statistic(iris_X[:10], iris_y[:10]) - - -@flaky(max_runs=3) -@pytest.mark.slowtest -@pytest.mark.parametrize( - "hypotester, model_kwargs, n_samples, n_repeats, test_size", - [ - [ - PermutationForestRegressor, - { - "estimator": RandomForestRegressor( - max_features="sqrt", - random_state=seed, - n_estimators=75, - n_jobs=-1, - ), - "random_state": seed, - }, - 300, - 50, - 0.1, - ], - [ - FeatureImportanceForestRegressor, - { - "estimator": RandomForestRegressor( - max_features="sqrt", - random_state=seed, - n_estimators=125, - n_jobs=-1, - ), - "random_state": seed, - "permute_per_tree": True, - "sample_dataset_per_tree": True, - }, - 300, # n_samples - 1000, # n_repeats - 0.2, # test_size - ], - ], -) -def test_linear_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): - r"""Test hypothesis testing forests using MSE from linear model simulation. - - See https://arxiv.org/pdf/1904.07830.pdf Figure 1. - - Y = Beta * X_1 + Beta * I(X_6 = 2) + \epsilon - """ - beta = 15.0 - sigma = 0.05 - metric = "mse" - - # sample covariates - X_15 = rng.uniform(0, 1, size=(n_samples, 5)) - X_610 = np.zeros((n_samples, 5)) - for idx in range(5): - X_610[:, idx] = np.argwhere( - rng.multinomial(1, [1.0 / 3, 1.0 / 3, 1.0 / 3], size=(n_samples,)) - )[:, 1] - X = np.concatenate((X_15, X_610), axis=1) - assert X.shape == (n_samples, 10) - - # sample noise - epsilon = rng.normal(size=n_samples, loc=0.0, scale=sigma) - - # compute final y of (n_samples,) - y = beta * X[:, 0] + (beta * (X[:, 5] == 2.0)) + epsilon - est = hypotester(test_size=test_size, **model_kwargs) - - # test for X_1 - stat, pvalue = est.test(X, y, [0], metric=metric, n_repeats=n_repeats) - print("X1: ", pvalue) - assert pvalue < 0.05, f"pvalue: {pvalue}" - - # test for X_6 - stat, pvalue = est.test(X, y, [5], metric=metric, n_repeats=n_repeats) - print("X6: ", pvalue) - assert pvalue < 0.05, f"pvalue: {pvalue}" - - # test for a few unimportant other X - for covariate_index in [1, 6]: - # test for X_2, X_7 - stat, pvalue = est.test(X, y, [covariate_index], metric=metric, n_repeats=n_repeats) - print("X2/7: ", pvalue) - assert pvalue > 0.05, f"pvalue: {pvalue}" - - -@flaky(max_runs=3) -@pytest.mark.slowtest -@pytest.mark.parametrize( - "hypotester, model_kwargs, n_samples, n_repeats, test_size", - [ - [ - PermutationForestClassifier, - { - "estimator": RandomForestClassifier( - max_features="sqrt", - random_state=seed, - n_estimators=50, - n_jobs=-1, - ), - "random_state": seed, - }, - 600, - 50, - 1.0 / 6, - ], - [ - # XXX: Currently does not work with permute and sample dataset per tree - FeatureImportanceForestClassifier, - { - "estimator": RandomForestClassifier( - max_features="sqrt", - random_state=seed, - n_estimators=100, - n_jobs=-1, - ), - "random_state": seed, - "permute_per_tree": False, - "sample_dataset_per_tree": False, - }, - 600, # n_samples - 1000, # n_repeats - 1.0 / 6, # test_size - ], - ], -) -def test_correlated_logit_model(hypotester, model_kwargs, n_samples, n_repeats, test_size): - r"""Test MIGHT using MSE from linear model simulation. - - See https://arxiv.org/pdf/1904.07830.pdf Figure 1. - - P(Y = 1 | X) = expit(beta * \\sum_{j=2}^5 X_j) - """ - beta = 10.0 - metric = "mse" - - n = 100 # Number of time steps - ar_coefficient = 0.015 - - X = np.zeros((n_samples, n)) - for idx in range(n_samples): - # sample covariates - white_noise = rng.standard_normal(size=n) - - # Create an array to store the simulated AR(1) time series - ar1_series = np.zeros(n) - ar1_series[0] = white_noise[0] - - # Simulate the AR(1) process - for t in range(1, n): - ar1_series[t] = ar_coefficient * ar1_series[t - 1] + white_noise[t] - - X[idx, :] = ar1_series - - # now compute the output labels - y_proba = expit(beta * X[:, 1:5].sum(axis=1)) - assert y_proba.shape == (n_samples,) - y = rng.binomial(1, y_proba, size=n_samples) # .reshape(-1, 1) - - est = hypotester(test_size=test_size, **model_kwargs) - - # test for X_2 important - stat, pvalue = est.test(X.copy(), y.copy(), [1], n_repeats=n_repeats, metric=metric) - print("X2: ", pvalue) - assert pvalue < 0.05, f"pvalue: {pvalue}" - - # test for X_1 unimportant - stat, pvalue = est.test(X.copy(), y.copy(), [0], n_repeats=n_repeats, metric=metric) - print("X1: ", pvalue) - assert pvalue > 0.05, f"pvalue: {pvalue}" - - # test for X_500 unimportant - stat, pvalue = est.test(X.copy(), y.copy(), [n - 1], n_repeats=n_repeats, metric=metric) - print("X500: ", pvalue) - assert pvalue > 0.05, f"pvalue: {pvalue}" + est.statistic(iris_X[:20], iris_y[:20]) @flaky(max_runs=2) @@ -398,10 +217,10 @@ def test_forestht_check_inputs(forest_hyppo): @pytest.mark.parametrize("backend", ["loky", "threading"]) -@pytest.mark.parametrize("n_jobs", [1, -1]) +@pytest.mark.parametrize("n_jobs", [1, 2]) def test_parallelization(backend, n_jobs): """Test parallelization of training forests.""" - n_samples = 100 + n_samples = 20 n_features = 5 X = rng.uniform(size=(n_samples, n_features)) y = rng.integers(0, 2, size=n_samples) # Binary classification @@ -529,3 +348,27 @@ def test_permute_per_tree_samples_consistency_with_sklearnforest(): assert_array_equal(clf.train_test_samples_[0][0], other_clf.train_test_samples_[0][0]) assert_array_equal(clf.train_test_samples_[0][1], other_clf.train_test_samples_[0][1]) + + +def test_small_dataset(): + n_samples = 32 + n_features = 5 + X = rng.uniform(size=(n_samples, n_features)) + y = rng.integers(0, 2, size=n_samples) # Binary classification + + clf = FeatureImportanceForestClassifier( + estimator=HonestForestClassifier( + n_estimators=10, random_state=seed, n_jobs=1, honest_fraction=0.5 + ), + test_size=0.2, + permute_per_tree=False, + sample_dataset_per_tree=False, + ) + stat, pvalue = clf.test(X, y, covariate_index=[1, 2], metric="mi") + assert ~np.isnan(pvalue) + assert ~np.isnan(stat) + assert pvalue > 0.05 + + stat, pvalue = clf.test(X, y, metric="mi") + assert stat == 0.0 + assert pvalue > 0.05 From 80a4304c5fc556e34d878b8f3188e72be2bee318 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 5 Oct 2023 10:29:06 -0400 Subject: [PATCH 69/70] Final commit Signed-off-by: Adam Li --- .../forest_ht_independent_data.ipynb | 264 +++++++++++++++++- sktree/conftest.py | 20 +- sktree/stats/tests/meson.build | 3 +- sktree/stats/tests/test_forestht.py | 19 ++ 4 files changed, 283 insertions(+), 23 deletions(-) diff --git a/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb index fe8e153ce..38e3088b6 100644 --- a/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb +++ b/benchmarks_nonasv/notebooks/forest_ht_independent_data.ipynb @@ -100,7 +100,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, + "id": "560b27ff-26a1-45af-98d8-f2b4ac3c0c70", + "metadata": {}, + "outputs": [], + "source": [ + "permute_per_tree = True\n", + "sample_dataset_per_tree = True\n", + "\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "# initialize hypothesis tester\n", + "est = FeatureImportanceForestClassifier(\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "id": "3b53a9ec-befb-471a-9a44-681bbd86b31b", "metadata": {}, "outputs": [], @@ -120,17 +147,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "ff616595-bad5-4845-94b9-713c11dc5745", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "plt.plot(pvalues)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, + "id": "cdf6ee7b-8ab5-4abd-af46-63e5889a7baf", + "metadata": {}, + "outputs": [], + "source": [ + "permute_per_tree = False\n", + "sample_dataset_per_tree = False\n", + "\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "# initialize hypothesis tester\n", + "est = FeatureImportanceForestClassifier(\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "c4569a24-4b12-4266-8d6c-7e740737c59f", "metadata": {}, "outputs": [], @@ -150,10 +225,187 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "2da6a320-605a-40d6-9f8f-1aa1ff03110e", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiwAAAGdCAYAAAAxCSikAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABeLklEQVR4nO3deXRV1dk/8O9NMCQWEoZIMBCExFCqtqAMIfi2b5cJpeLra62rxer7SvFXfLEgSrqqQRGHLo0u2zBaB/raurS8YOvQLgfaNFQrMsjYOhIgCjEaMAESRJLAvef3R9gnz9l3nzvlJvfc8P2s5ZLce4Z99tln3+fsvc8+PsuyLBARERF5WEqiE0BEREQUDgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyvT6ITEC+BQACffvop+vfvD5/Pl+jkEBERUQQsy8KxY8eQm5uLlBT3dpReE7B8+umnyMvLS3QyiIiIKAZ1dXUYPny46/e9JmDp378/gI4DzszMTHBqiIiIKBItLS3Iy8uzf8fd9JqARXUDZWZmMmAhIiJKMuGGc3DQLREREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXle1DPd/uMf/8AjjzyC7du347PPPsOLL76I733veyHXef3111FWVob33nsPeXl5WLRoEX784x87lnn00UfxyCOPoKGhAWPHjsWKFSswadKkaJPXrZZU1SA1xQd/wEJqig/zSwrtzwDYn/sDFhZMHY3l1XvgD1gAYC+vC7WM3PZbextx6fnZ9vdqvQVTR+PaJzcBANbcVByUxo37GgEARaMGY+vHh+3PJ44c5FjOH7Cw5aMm1B3+EjMmjrD3I9OwcV8jikYNxoKpowHAsV9TmlX69GOVn6nl9f3NLykMWt60frTbUGkuGjU4KL/Vd4pcRm1H5pV+7Pp21HcyPfq52vrxYUwcOchRXvR/R3uc+nYlfbktHzVhSkG2oyz7Axa2fnwYAcuyz7dK95SCbLy1txE+H+z1llfvsT/75MgJNJ84iQtzM1E0ajC2fNSEFJ8PAaujjNcd/hIAcN7gryBgWbAs2OX62ic34ZMjJ3DNJcODrh91PAAcaZb7VvspGjUYAMKWW1MZ0M+znn/hzoN+7lX6Lj0/G2u3HoDP58OGOy4Luk7UtTk5v/M6lXmsp00/pmjLiF4W3I5R1nUyj97a24j6oycwfGCGXZbVedm4rxHvfdpilwFZllTaLQv29uXkplMKsu082fJRE977tAVZGWdh+MAM+3hV+ZblU0+brCvV8Wz5qAmfHDlhb0umV32u0iXTK69JuW09r03XryqHet0fzTUa7rfDdP5MZUEy1d2RbjeRog5Yjh8/jrFjx+LGG2/E97///bDLf/TRR7jiiiswZ84c/P73v0d1dTV+8pOf4Nxzz8W0adMAAGvXrkVZWRkef/xxFBUVYenSpZg2bRp2796NIUOGRH9U3SQ1xYfKqhpMKRiMjfuaHJ8BsD8vO11gK6tqUHb6pKtl9As/1DL6ttW/1bJqP5trD9vbM6URAOqPnEDdkRPITO+DltZTqDv8JeqOnLCXm1Iw2N6OTIdMQ0c6su19yf2a0lymVSTyWPU81fe3ubbJzstQ60e7DZXmFJ/Pzh9VmavvFLWM2o4pr+Sxu+WLTI/8buvHh+00yOPral7J7Ur6dlNTfNhce9hOk9pe3sAM1B05AaDjfMtj2lx72M6HjnWdeaNsrj2MFJ8vKE+V+qOtp7c/OChvtn582JFWtQ8AmJw/2E6z275VGQ1Xbk1lQF47prIW7jzI86vSoF+7163ajMn5nZ+p/J5SMNg+d+ozmX/6da2OKdK0RXo9ye3I/ck8yhuYgU+OnMAnR07Y9Y4sS0BHGVD1jp7f6vhkWVPrdBxb5zV2rPUUPjlywj5eWb5lHsjzZ6pL1fY+OXLCLpsyvZ9o6VHpldek3LZeN7pdv6a6P5prNJLfDpNQ64aqv8JtN5GiDlguv/xyXH755REv//jjj2PUqFH41a9+BQD42te+hg0bNmDJkiV2wFJZWYnZs2dj1qxZ9jqvvPIKnnrqKZSXl0ebxG6jTqwsuFMKBtvfywpBnXRZGGThkQXDbRlpcv5gRyWn70f+u2zqaMeFpC5CFaxkpvexKwtZ4cvtyPWVKQWDg9IeKs2K27Hqear+lj9EofIq1m2oZeWPpf7Dp/6t8ihUXql9hsoXuQ95HtX2wpWdaI5TplNVSKY8lNtT3wUHHsF5I8uiPCa9AjZVyPr2J+cPdixryg+5/8n5g+3yHWrf+j6jLQNuZS2S82C6TuUP9cZ9TXZLEwA7WFHrycB4476moLKithuPayGSY3TLI5k+veyoOkcGI/o51v8vuZ1PVb5Nn8tjUn/LY9G3rwdLAILSa7om5b5kXRtN3R/LNSrXD3f+Ilk31u0mks+yTrfVxrKyzxe2S+hb3/oWLrnkEixdutT+7Le//S1uu+02NDc3o729HWeffTb++Mc/OrYzc+ZMHD16FH/605+M221ra0NbW5v9t3rbY3Nzc9xffmhq+qusqoEPgAXY/weAFB8QsDouStWErda9btVmbNzXhFSfD37LwvCBGXaTqtruxn2NjqhfVmRpqSlo9wcAsU9ZuFS65HJqX6a0yu/lxfjctjp8ol3I6uJS2x4+MAM/nJAHf8DC8zs+wSdHTtjf9U/viIOPtZ5yrD9x5CC7KVcqGjXY3obMG7nN4QMzMGxAht30rzelXvvkJuzYfwTtfsteR6VZbXNy/iBHXstzKPNBPwf6MsMHZji6LSqrauzzPjl/kKO7Qu5DVZBqWbltAHa6M9P74GvnZqL+aMc50LtI1N2ZSo86Tln2Vs+ebO/flG6VB6rJ3xSg6sc9OX+QfdepPtOZfnxM3JaTZVeeR9WFKY9HZ/oBUkG62pYs5+o8m8qAqQtFp19v+jUij0HmnYna97AB6cgbdLa9rH69Ah1lTHW1uHUpqGNT+1d5I9Mju19MXbcAgsqa+n/ewAx8/5Lh2PJRE9xa0UzktZA7ION0F5F7eQLMZUVep/LcqvIMwFiXBiwrbHpNadGvnX97eL2jvgKc5dUfsHDp+Z35q8quzP81NxXj2ic3BR2Dqs9lt42+nNyH6vaX3Tuye/u9T1twTFwD8npSdb1MV2d+ObvVVLkCYOyO7IqWlhZkZWWF/f3u9kG3DQ0NyMnJcXyWk5ODlpYWnDhxAo2NjfD7/cZlGhoaXLdbUVGBrKws+7+8vLxuST/Q2bSmmnnnlxQiLTXFrkgsdBTWtNQU+4do474mpKb47HVVsOID7ADik9NNvsur94gmx2zI91WqC1VWgmqfqT5n/6RKV7s/YKdHXUwyrZLf6viBlz+w6kJU0lJTsHr2ZHvbqT4fPjlyApVVHf2wKrhR6TvWesoRrKjjeGHHJ6IrofM/uQ1VKf5wQp5jm58cOWE3627c14StH3dWOqqJs91v2euoNMsKRTYf6+dQ5oPe1C6XkWmR21HnPcXXWVbmlxQ6fnDqTudrwAretjzWltZT+OCzFrvJXXaRyKZklWZ1bmTZs7ujxPHLdNvBzOlgOi21sypQZUfPm47+/s4ynOrzOcpJqs9npyUUmWZFbUWWXVXW1LUkjyfV5wtav04rtyovU30+e7vOrp9spPp8xjIg88eNfr3Ja0Qeg553JuocnTf4K3YZl2mTphRk2+VAXgcAHMcm81DljUyPalmTxyiP2xRMqLqr7nS5NHVLhXrfrrwWPj16ImywIq9j+Zm8TuW5VfWtzAOlo1UjO2z5VOVL0q8dvb5S6ZDlVeavLCtAZ11kOga1D1U/Lq/e41hO34dcTl93c+1huy6W6279+LCjrpflQq0nf/NUuVDfhbouulPUXUJesXDhQpSVldl/qxaW7qA3rQFwBA/yb/3ik02y6i5H3mX8YEKeo0lxc21TUEuI6e5X7Uf2QS6v3mMXvEhbWFTaVUAFBN/RqO/ltmXTvX5na7rTBWD8zNQs7Lcs/GFbXdBysulf/SjL/JH7bfcH8M2H17tWhCqv5Plq9wdOX+yhWwhU07rcjsoXdSyVVTX4w7Y6R16ru/1QFbRcTlH5rJ8XtU/93Kg82Vzb5NhP3sAMY9OvOgbFVJbVZ7IM68fgtyw7LaHINCumwE22INktHuIu3++3HOvr+SbTqyplVc5Vk7h+bejLhGoa18+96Tw4f6TCtz7J7jaZNknvmjF1Kah81FtYVD6YyripS0DtS68/VNeWSagmez2Ad6srFNN1rK5TfTn9s821TUHX+B+21YUtn0Bw2VZkV5NpWVVe9W5K/Rozkcdg6roMtY9Q3dsyj1XrmF7Xy3Iqu7nkNpVEdhklbZeQLtImpUiZRlibCqupElLdGUBnpasqU3XBquZ7eTHJZb92bqZ9h2OiF2bV1Gnq0pBjV1paT6FvnxS0nQoYK3h9HMeBw1/ax6KagdXdXbgKOFxlpHeV6P2q4Y7blG635mO1zvCBGRgx6OywfehuaTel1dRvrlfwpryORKiul9WzJ9tN0+pvGXia0i2DANltpPLpN2/WOtJpyhv9WDLT++CiYVkRdQWZgtu80+fD9L26ltT/I23Wl+nWrz3Zzaq2qx+T7PI0PSmmrjHVtK/Og9tNiJ4WU76o9WT9Eer43Lr9fii288MJefb+9XyQy5q6JOQAW/1GK5Jgw02s66kfbVnOphQMxjv1zY4WXT3P1XJ6+VRPAMWa3rKpo43d53KfelnV621V/5iWU+dBH0ogmbppZTmX9bdOXlOyyzVU92V3BSue6RIqLi5GdXW147OqqioUF3f0t6WlpWH8+PGOZQKBAKqrq+1lEkHvBjIpmzraHjQoycYyVcBUJaV+xNR1IpvW5bKpKT7jtlN9PkcELJ/YAOB4UkGRwQoAtJ3quOMyVZx65C0Le92RE3a3TiR3iz+YkBey+VXeaclWC9Mxh6KaOcumjg5qPlY6mpAH45MjJ+xWIbWOKZ9VmiSf+FwOVJUD5FSFJLvgUn2+oLxWgxTDHVeocSLXrdpsnx/1tzo2KS01BW/ecZmjQlPL6z+sejon5w8OSqe+jHrqzJROnWmQoypPpu9HDDobABzHGckYBHlO9WtPDmRV2/3JN/Md5UZ2eaouXfl359Ms2Y4ugrrT68kBmfI6cQta646csFsVTT8weomWLYyAs7tS/Riqf6v9y2BF7kd2XenBypSCwXagq8qyKVgJd43qx6qX0Uiolkb9M737WbYKhRJJsAJ01GOmbW2ubQoZWG7c14R6w/eqLpL1kFtXlRqgq65d1dWo1B8J7laT5Vzv3pfkNSW7eGSXpCTLR6JEHbB88cUX2LVrF3bt2gWg47HlXbt24cCBAwA6umpuuOEGe/k5c+agtrYWt99+Oz788EP8+te/xnPPPYcFCxbYy5SVlWHVqlV4+umn8cEHH+Dmm2/G8ePH7aeGEkH9+Mig5Tmtm2JzbRP8Acu++NTpVU/kSKpAyDtuxW9ZQU33q2dPtgc4SX7LwsZ9jacLfCMmjhyE4af3ry4Cfd/DB2bggtyOqHXYgHRMKRiM718y3PiDkpbq054IGeT4cZWPQofzXJjmV3k5qB+Y37xZazxmtexz2+qMTbLDT3d3XLdqc9CP/MZ9jVhSVYPJ+YPtvKo7fde+ubbJsT21n8z0PtjfdNyxHXXXMnHkIEzOH2QPULv2yU1Y4tIqJAcpSptrm4IGH4c6bunA4S8xOX+Q/UMvn0SZnD8IzSdOOpZX3RUqOJZN+sMHZjjWzxuYgWED0gEA/dP7YO3WA0HBSFqqM1WmO9DhAzPsfOpo0ej4txqQrbbQX5TVYQPSg4733fpmOz1u+wc6ryfVVfXctjq77MrvLLG+Kguye0hW0v3T+9jXk/y/Wk/9f+3WA/a+FP3anVIwGMMGpAddm0rfPimOeqO/tpyppJiuheGinMn0TCkYjMz0Po7tqDqk3R+wu8OWV+/BkqoabNzXiLKpozFx5CDkDcywy6K61nIHZNhpjaSLE3Be70e1MipNKRjsOA6gI3/kMcp81Ouxd+qb7WNV57Oj20P/AfadvmEZZCxTym/erA3K/1B1oKxD6kRQCHS0rFz75Cbsbzpufz85f5Cj+woAnttadzrdjeif3gd+y0KKr+McqeAS6Pyt0fNftaBNKRgc8twoqizJsq2vZ+p262lRByzbtm3DxRdfjIsvvhhAR7Bx8cUXY/HixQCAzz77zA5eAGDUqFF45ZVXUFVVhbFjx+JXv/oVfvOb39iPNAPAjBkz8Mtf/hKLFy/GuHHjsGvXLqxbty5oIG5Pk0FLwcJX7eazjx+6wtF/rH7EP3roCruS0O+kVDSvikBL6ylMKRgc9EOmKv/rVm123EnK5VQErAY/bbjjstNdQZ13UrLiPdTSZncXvVVe0tEKIbqbZEDS7rccn6+5qRjzSwqxevbkoEc03ahthWvWVj+cQMeFPKr8FTvf9BYI/a5Q7Uet/8mRE/jmw+sdLUQyv9Sd8Q8n5NnHoVpb5PY+On1uW1pP2XOFyLxXdyNrbiq2By6q8yDvStX2VLBSZyg76vzqZUBWzrLKkMdaL+6OVq7f6zhWmYf6o8H7Kqbbd81qLo2V6/c67qZnTByBsqmjcex0HqhlO8dGdKZKtTqp/X18+hpQgxTX3FSM1bMn2/l1rPUUyqaOtq+VY62n7OCp/mirfbwqD+V5UOT+Vb6o60mdPzXXhtrGlILBYpxK56B3vWtvX8V0+zN1594i/q/WU+ehsqoG9Udb7ScqVLrVoEWVL6tnT8aMiSPstOjnve1U54DrvIEZ9r7Lpo52LCfLtbwW1LlW3VIb9zVh2d86f1w27utsPVPnUpVBlTcqH7Z+3Dk/ydaPD9vnX16DqSk++1jkD5sekKlzo9ZX6ddbReTxqZYBeawqf9T+5bGollJ53lTa5PlU512t1+63sLm2o3VDL1Myr011kqw39JYIVc7kejUPXG6nY3PtYdQfbbXTqLpf8kSQ8MnRzt8AlVfDBnSWg3mXnR+UPkld25HcWALOMq2uHz0fAITtdehuUQcs3/72t2FZVtB/v/vd7wAAv/vd7/D6668HrbNz5060tbVh3759QbPcAsC8efOwf/9+tLW1YcuWLSgqKorleOJOfzJh9ezJAIDVsyc7ximozyOlLkz9zvQHE/KConf1QxKuC0GSzYduzb1AZ5+k3pWiP4EEdD4VEeqYAPeAxtQMLI9LVRn6j6zbtlT+qe12TnY2OKgSk4NhAQQdh9qePgYkb2AG3jwdECqyi0CmV/ZLqx8pGRyoAFfvgpLpBpyVs/pcbketo/5tasUy9TWr/ct0qTtr2a02v6TQLvcyjfpATdOAUMDcOuk2x0TZ1NHG8QGmMqTfA3c81eDMX3X+1HlQ+1RP8OhddUDwvBmya0+nB1Tq33J+DdMTLaZJ6VQ3nU4GgJtrnWU8VFO9zE+3J7lUgCnPZahrRf37BxOcDzTI7gxFBifyWFRLgho7pNP3rUzOHxx0nZqOxa07We1LHqt6ovPNOy5zlBH9fMobQEmvJ+Xgc/l5qOtCUt1cspVSn9NG/S3Lgd7VKMmbCCmS3w9VDyjq2tHrv0QFLUn7lFBPWFJVg+d3fOJoDlUj8q9btdlR+V375CasuakY/kDnvBhuFfrwgRmYnN8x0Or9T1scEfKyv9VgaFb66WmiLXuafPlYtGruGz4wA5VVNVi5fm/QkxXqaQDA+TTJ6tmTT08B7pwvZOO+xqDR7tc+uQnyWXzTSHc5wEvNO6O6Unw+H5pPnERmeh+cN/gr8AcsNJ84GdSXvimoX7qjOV39iOjHDgCfirvugy2d8/Govlv9B1t1n8lJvSTVTy/3owYZA8FPiskuM6CjBU3NAzE5v2OOhOtWbbbLg3wVwvM7PnHknRr8OL+kY2p3vUzIViM5f0IkAxdVuoCOrouN+xrtAPWbD693PD2Qmd7Hca7lEzkyHYocPD6lYLBjTIA6bn/AsqdFd5vfRA3+C/UkG7TPVPkOiHlo9KBLzUUhn4SS+1Ddqyp/ZjyxKWhKd7e06GO9Rt/1muPpOUU1o+vlA+ioM0xP8KnxZuq6lgNs1SBP/cmmf3t4vV2G1BgOU1eNJdKstjE5f1DQJGfymtz28RE7X9X3QOePohygrOaGkXP7qJaEiSMHwR+wgs63OkdARxnNOz1uydSlLI9Hdm2q/Fdlw29ZWLu1o6VfddvLp6S++fB6fP+S4Thw+EvAAiaOHISAZdljQrZ9fARAR9dcVsZZ9nbUdaHSLs+bfr2o8mEaK6MvJ6/lpX+rQcCC/RsAwK5bQ/2uAMHl1e13xESfH0xS50fVzRv3NSZk+n6+/DAEOT9I8enotLKqxtj1oJ6DV10tqplVb1WQg+HqTzdtqq6Cjr7IjmnLfzghD2+Vlzju3IDOO4QNd1yGNTcVBz02qZ4AkReDaj5Ud6ALRFePfL4ecN7tyGfxZauMPCZZAaqnh2ZMHIG3ykuw4Y7L8M690zBj4gi7K0X9EC8Q+1EXvToGOWW2OvayqaMdF4epBUndQeuDw+aXFGLNTcVBF5cawCy3KfP4zTsuc6xjmq9EdQupJ0kWnP6xtCfcSulolZOfq2bXj0T3SWf5yEbL6W6Tjx+6Imicj9qOGoyqk4M9l1fvsZefX1KIGRNHOOb90ectaWk9hetWbXa0htxaGnznKte5rXS0fTcqB+7Zc3icns9Dnk+VX3LeH9VN5Db3iCTLeceTOoPtOYfkuV9zUzHW3FTseNpFXZOq3Kop/lWaZOuZuobdxp3IeWdkK5U6D1NEnSHLh/pMdsfI/+tzx8jWW1V25Dqq+V/O91Q2dTSKxCRgKk1AZytNZ2uRc34iOcBWb6E1XQMb7rjMPjZZ1tVy6hgWTB2N1BSf43zLsjq/pBBvlZdgzU3FSDk915Oel7IMAHAEIWpZ1Q1Uf7TVznv1ZN++iul28PLCjk+w4Y7LsKG84zqfUpAdNF/NO/dOww8n5NnzIanrQtYL8sddlUuVRnldyOtKdlXJvJTzKf1wQh5mTBxhH4MKdE3jadT/ZV2a6usYLqD/jpha9eQ1pV6DoOo2Of+Y6tpN1FwsbGFxoU+fLP9t6noAOu881OemgVn6dtQA2+XVexxP88hWAPlvVYGoQiTvQGRaZEAln+lXQYv+SCsQ3I0gn8VX5HgMfXp2vdvFrSlcbdMU+Mh0hepykcdumvdCzk8jz6napnNCOed5UhWWvg23+UrkU1uyYpDjnOTdjTpOtbw6Nvm+F7UdeQen9qE/sgltGRm0yH3orUT6cau7VdN5kneAch4It/3Jfenf69eLo+UlggGCcq4SvZtPP296+dP/L1+5IB8Ll69kcGvJ8lsWvvnw+qC5PmS3mtqe6XqW14S8VvTWADk3R7h11LGYysjm2ib7PMjWAHne5OBsNddNqDon1LUm54HR5xdxK5P6DZpKt34s+nw1OrldlSdq2/IpJ70uVPmrlyU5v4lMr+kRb1P51K9tE70OM107QHArityuPkGp2/GZqP3I3wxT3aafw57UpXlYvKQ752Exzb+iCo08caoJ2rI63/ipBrsCnVNFDx+QAQuWPThMnwNBNtVLcjptlR71mWoqVs3O8k3NUwqy7eY7/Zl+1aWhT8MMdE7FXHf4S9QfbQ2aw0MuI99sqk9DLdOkzy0gu0VU+lWer916APVHW4Mmq1N9+3qzPBD8Pg+5XzW9tf7jBHRWPHJuAtMPg74P/W/TFPDyc5l30rVPbnK8KVufBEyfH0WmWQaRG/d1PMWy4Y7Lgrpe1N/6dOoy/Spgll09Mp/3Nx23B8Gq9UK9YVo/fsA5Xb0qd/rYExN1nHJqenWNqflQ1P7U36a3Dutzl6juCvkmYL1LRU+XnCtlSkFH966aAl0elyzranvDBqRjxsQRxjchy3zQ58ZQ17H+NmbVDaNebyFlpvfBv+6dFpS/aqp5WRZkfri9t8xUZtTfepAou+bUOTJN966/HVu++VntQ9YTQMfrKlTe9E/vgwtzMx0vK5R1spoxWr82VZ6Y6hf9GpSvxJB1v3zTtnzLs+kN57K7UZ+oU84lo+etLId6t6GsN9U6quyo9Orp1+c2knNLmX4z9OulO4KVSH+/GbBESPaRKmmpKah5IPhFkKZBrW6fq+3q29L359bK4DbgM1Q0XLDwVfsual/F9KiO3+2Yo1levwDUMqb0qu3Ii0Udrwxc9Dt+PU9CBSAyj02tQnp+u51PNebA7VyGyjs9QDH9e9nf9gRVXG532npgZNo+4CzD+nkJlTf656Ho51Dfr2zV0Fsv3f4d6TlxS1u4cyKvPz1YcQuqQv2Iy+25pcvteg11V6t/Z6o39B9J/YZDns9Unw+3lhaGbC12W18vr+HOSaR36/Jczbvs/JB1nf65aRuma9Ptx1hPo1vdaToWt+OTwYopGHT7XF3zat+mVhO39IerB92+V2L5zYhGpL/f7BIyML3sUA9W3LoNgI6mM322WXWXIe9K1Xb1bZn2p7YLdP4Qd0zN3GhvVw3uUv/XuxtU1C2bfNU+TTP7KmrKcTUPgD4YV+WRbNnRm4PlsnJgsmkZlX69WVk16y4RlYBaduLIQY71F4h8ls2z8iJVd3zqrkJvplbrvrW30Xh3KAeiqWPWz+W1T24yfi7zbIGoHFS6TIGLPuBQPh0h06zupGR50u94geAuFNn0rY+NMrX6qIF3ocgyrk+vft2qzae309misOWjJqT4fI7Bwuo8yIHT/oAVNDuonj63tOnlym1wuWpl2t903FFW1F3q8IEZ9l39sAHp9jWm7lRlF15E08EHzHevcgCzyeT8QfY1Hmo/E0cOwoHDXzrm/5HnHOgoY2ouJPmjtuof+5CW6sOQzHR7AKhc/0DTl3b69GNwds80RtW1oJ8r2a2ll8vhontd5pX+ygS9ntXrF0leV/pAdL27RdXB8npSx63YrexioP2SKueNxPCBGfZ1LKlgRX8li7oWZHr19L+1t9FO3+T84Hrjrb2NGD4wA4HTQZvsBlKvRtAfPEkEBiwG+ttr9f5KoLMfXx8noMjmafkUj1rWFNnKvk7AGWXr+6msqsG2j48EdT9s3NfkeHRQVkib9jUZx4rox+zWRK1++OWgWNOxqLTrd+Rqu2pAZqhl9ApN/a0Gkir6QFpTRSnPo7qjMvWjR7od03Ju51Llk15e1DplIv1yWzv2H7XTqT5T+ar+lkGJnk697Ml8VAGuqcXEdF427muKON8kt35zOS5M/a3fsZt+yEK1SujHGS5N+jHKwEdviao/2mp31QEdg9E/OXICh1ra7H3K63b17MmurVFqm6bjcXviQl0v+iPB8lj0qdRVHst9qUGv6jqSdYas29SjyTJ9x9r8ADpmHta7/MKNzZDH6nYNmujnSm+ZkNsCOgYk6z+meqtS5/kOHi8m80VPu1taVMuDW11mClr049bPuyrPpu45NQjerQy55X2k50YNuFX5u6Sqxh4vsyBM+e0JDFgMTIUs1GBTtxM4v6TQ0U2gmjP1F1S5XZBye3rQIrer/4DI5VSFIpsTTQM+TQOsTE3IcvumY9HTbspL/QcplmWivVjc8qyr3H4ATYM25WA8wNw1oKcTMHcxqM8jLXtud7V6vrp9F22+BbfGOLt0TI9mxjKgL5rzGu743bo21L/15eU5Mg2ylvN/6GmL9gdHX8ctf9W+TIPX9R9Q/dqVrWBqQKosr/rxRXu+unqu9HFn+gv79DxyG+QujzuS+sUtLbKbJNJ6Ktxxzy8ptLt+1Ssm9HJoqqu7Sr8OyqaOdlyjpronXvuOBgMWF/NLOrt11Psz1Pwp6iTJJmrTc+l6k2PnlPrBc0aoZkzV1Gy6w1f70LdrusuWd516hWRaXqa9sqrGvmhMg0Q7ppI+bM95oo7F1G2i7wNwFn69K0ouo75X6ZLLyIF5+v7kQGA54DRUN1UkTN1msutCprWyqsaewElWNuoHTp/7QqZdLzNywKXe3LxxXyPe2ttod0foxyu7omSXgzwWvdzJrir1o+vWBerWlah3Y5Rp+SKbt91acCIRrntHUnnpVj7VIG+31hpTl4x8mkP+iMtWVVPXm9peqK5Yty5DPX1LqjpfmGgq33rXndyWunb1LtqO9ZqCbkKibdHSjyfSa9CteyzUCz7d6j69/lJdWrJrS66vn2dTWvTuoXB1MBD6CUa37ap5guS53LivEWtuKg5KqywvsTAF76GWi/RdTPHEeVhCUPOcqIGh6s2mqsDJuT3059JlVK6mZZYvlZJzRqhlLz0/255fRaf2p5p05XZVmuSy+twQZVM7n/03LS+DAtPMvvKY1Iu6VHFVx6LmdjFR+1igXfh6E7VaBuicv8K0HTW3h34ssonfNP+BW55FwvRCTFNaVf7LOWHkOVF5pXMrM+oHWN+/LBMb9zXZfd6yK0Z2RckfRrktNTeGepGfWlbeobqVHbeXhMp5WORcMHKOD+e06dG3fLnllyk9qitMzjkizS/pmAPENDO0+l4+MVE2dbRx1md5ngG4ljlZjt3Sq5d/vQzJc6meFNH3pddT+rbUda7qCDmFvKK34rrlUSjRXoN6PSHJrrG01JSg45XpMz2RJ+eL0cm60C0t8lhC1amRlAO3PFLb1V/0qeZAkfWlXLerc6Ooa1TR81cu19OTxgFsYQlJvyMAzHNu6M2F4Zqe5Uy0cqKncBd/JE36Kk1yoGOkTZ9qH+EGlgFwbD/UAORwwjV3660BpvX0riQg+I4wHt1LkaRV5aEcpKwudFmWrlu1OahFJNJ0mrr81B2y/vhqJH3rehOzHJPhNmeM3I4cYKrnCdDZ8qTP8aFX2tGUoUi7d+aXFDoGvBvLu3h8M1QLQKhrQJbBcAPm5d+Rlim1fz19at1oy3eoOkJnGkTuNueN6Q4/ntegLMfq+gI66+U/nJ4d1/QgQzxmZo22Dta5HXek242mvERCr1v1l8e6/e4lCgMWF25jE2TlHqrpOFThUc23sjk2kkLgtl3ZRCfTaXpiRF8+1DGbBpYB5seFQ43lCcfUhCqP021AsCR/MOS6keRZPNOq/6jJH0/TWCUl0nSG2r99l2WYj8J0vHJbsrUgVHO6aTvqrk9Pk9p22elgwDT4UeULgKDAJ5Rw+aW2pQ94l+S5crve9WMOdQ2ouZDkXD6mHxu9zIUrU3paZfrUE2XRlO9QdYRKB9A5v4w6t3IQ+bzLzncsq/6tt87IfIvHNShf26H2qfZrmpdIpVtdD10VTR1smjfLrRxEk0eRlJdI6Q+YmKYSkL97iegGkjgPi0GoOxz1uducG5FuM9zz/92Vbrd9uC0jB5ap+RminQMhUpHM3WIKIPV+4kjOR1eZ0mrKQ7c5F7orr6KdL0dfB0DU6wPOO185wFwPXPTADohtbo5oj0umx9QKFOl141YO9R/LUC18kaRX5n1XrmtduG0pbufKbb4jfZ3uEOp8KPo8RG4PDvRkGkN93hWxXOsmeh7KugnomfPLeVi6IJK7t2gHcMpt6k27oeZYiEe6Q83joJpKAfOkQ6YBYG7P8ssX/0UrVHO8arY0tWzpc0/E2jXlxtQdJZvT5f5M+S9fWqYGMstyEK+8UnfKpjxU866Yxs3oc1QAkQ0ODEUfiLrEUFHr86wosbZ86UJ156ryo7+4MqjsixcW6uk2NdvLQdbhWjMlNVut6dwtr97jmEPDraU00i6PUHWbmtVaPsItv/vkyAnHfEeyZc5tEHk8mdJuyn/1gytfSKkPsA0n0sHQkaRRpjPSch1u/7H8BrlR51c9YGK6kYpkzqWewIDFINxF7zaPRagC4zZWobP5MPyrv2NNdyTzOOhpd0vnlIJs18FqsQrXHC8HGssBdZHOWdMVeneU3pokm/1N+aI+k3dD+g9ONMLd4Zvmm5AvHpT7NM2xAwQ394dLp1trl54H4T5T4t3SKP+W5Ue93C2au2G3sh/qcd1wx6O6ZCKdK0Ru09RaFEqofDcFtG7f6cccat14cUu7npbVsyc7rrdYWlbcuqHD5XW8ynW4/QOh57qK1pqbio11lNpmdwejkWLAEoWuDh6L5+CzaJj2EU2w0p3pjGWwaecjv5HNWdMVct9u8+eEqzDCDeaMlFteqXSpuV/U9k2vLVDrmIIVvXskkoF2poGo8uVtodbtDpGUp2gH0Ueyz1jPrxzb43buQl0X3dHVEGm643WHH++0uM1sG41E53Wo/QOh57qKtW7xyvkMhQFLFLrS5bLgdJOuvr7s7jANgo3HyHYguoFa8R6oaiJfSheqOf6tvY0htxNuzhq5r2ibd/VtVlZFPqBVbj9U61E03M6LnFdC3ZnJgbN6M74+qNaty0a+3sBEzYVjGgeiAiI1FXs8hDuPb+1thM9n7mtX88noL0mUgVmsc4uEOr+RpFlfVz93UjTXcaSivT5iKdNyH6ZXn8gntaKp8/S0uM1sGypt4fJDPyddqf+izQdTN6Y+z1CkvyFu51kOEpbXRjR51lMYsEShK10ubuvL7g63SiFewjVdK93ZXK/I4w7XHC/vKlT61bga03TcehNmrM27UizN/vFuqXI7L/rnevO42zGo76J5vYGk5sLR35Lr1mrQVeHOY2c3nXOuFbcZT4HOIKurc4u4nd9Iyp5c1+3cSZFex5GK5vqItUzLfZhefVI2dXTUdZ4pcHIrd9Fcb3p+yDrHVM9HI9p80M+1qest0t8Q03l2DqjOdnznxaCFAUscdKX5MB5Nj5HeIXmp2S/S455f0jkgTA2wlXNHqEG+ofIA6HxBHOB8QaBb645pLomuDLQ2HXu8WoAiTWe8z788h6YXJKq/4zVYL5IyI8d/qDts/Y5bnz021gGMkZxf0/tXTOU83MBbua3uPI+h0hjpMYfbh97daGqhi0THIP9Bjr+dwUtjRC2FodJqqnPildemfJAvs1XHIc/1tU9uMo4nUtvsmE0927EPmV5ZD6q6Ve3bdKPihYG2Eh9rjiN1kavCFUvTclfWDdVaAcBYsfd037cu0uNWd8my4jCNI4kkD+Q2TDP5hmv1iXfeRbrfaLfTU+e/K2W3O/anl5VQx9zd5zbaNOtdGeHKebKdR30fALq0v3hdOybh6pyuCJUPQOj3WbntX7aW6MuZ6oKeul4jwceaE6ArTbVdXRcI/XK0eHZNxFMkx63ukuUrDdz6p8PdJerNu/IuNtKKL955F49WtkjS2V3HEO8uiq7uz/RiukiCFbVtIP7XRag0RzrwtrvT2xPnUd8HENtrGeT2gPgPjo2mzolFqHxQcwYBkb2I07RN/Vjc6sGeuF7jiQFLHHWlqbarzbzywjW9HE0NyJLblc1+8RzgG41IjlsNapYvZ5PLmWaBXfq3GgQsOJpw1b5UJTR8YIYxv/R96033ppc1qu3Hmoehzl8kws2vof4t068fQ6zp7+muxnD7W17tnD9IztQcj+6NeKdZ78qQZUAOvO2u9KoyASAojWq78aoX9HwAuj7vT1evHZNI65xoyGvPlA9qXieg8wWz0byIU9+mW354aWhAtBiwxIlbUy0QPhLvyrqS2x2SPgeM3K6pNaanRHrcalBzJPmj32XoU47LKafV6xFC3WnoFXU8BvC66codbqTza+jpd2syjlS8ym689qd/r7e0SD0xuDySNMt06GVAdll2V3plmTClMV71gqnVF4h+3h+TeLcORVPnRLNNNS7GNJNsUf4gxxQDO/YfdZ0XRedWxvT1e/p6jTcGLHHQlabaeDbzhoucu6v51CSSmRr1F9JFmr5Q+WO6c1HLyWClbGrnC/6ieXljd+ZhvO98wr00Ur2mPtb092SXSiT7M020ZnqRZ0/qavkNVQbiOVi7u7l1Uct/ywGo0ZabeF47XS3X+nmRf/9hW53dzSe3p+qmyfmDHfkQyfGEu35la1m0x+W1MsaAJQ660lQbr2beSCPn7mg+NQnXEhHpS9uiyZ9QeQDAEayov1WTeyQvWNT3Hc887I47n3DHtLn2sD27ZSzp78kulUj2Z5rTRP2tBlD2dPN3vMqvKc3xau3TX+qov8Ay3q8NiXXeHzfxvna6Wq718yJbVtT7pjbua8Im8aJB+QSb6p6LdF4UPb36+VcvAo3lRZnd2aIcE6uXaG5utgBYzc3NiU5Kj1v2txrrvDtetpb9rSaizy3LsgrvfNU6746XrcI7X+2xdIVKT7z3pX+u/iu889WY8sskXnkYr/REsm35d0+UgUTozvzsLrGmOd7XWLKVCa+ea33/P3pyk3XeHS9bP3pyk2VZlpVf/op13h0vW/nlr4RcL9zn4fYb7fqRbLM78jbS32+2sCSxSGeLNc1+2BODrnqqNQdwvyvStfsD9kscu9IyEM887I6WinCzdcbz5Wle05MtP/FqMo/H/CZdvcaScTBmT7fyRcr9umtyjKvyW84HIbp6PN2RHz1Zj4fDeViSWDSP4rp9111jWKR4vQY9WqaBg6ZHfGPdbk/mYbT0NKlzoCpJIHiApZfSnyxiuQa7Q1evsWQo08lIPy8yWNlXMT2p8rk763HOw3IGiHYAaE8PklT7TMRdmx6syH1WVsX+csRE5GEs9MGobsGKvqz8m8KL9hrsDl29xpKlTCcb/bx88+H1ri0rXs9nr7S+MWBJMnoTtCzwy/62xzjfhNLTzaeJfIROTdutvyhM/Vu+HDHa7crBg/Ge08RNLF0P80sKHU9CqblnPjlyImgbiW5CT7SudO0kssk8HteYV7tVkpnboO+8gRl4847LjOcpXvkc7yd7vPQoNAOWJGMatT2/pNAOVkzzTSg9Ne8EkPi7tnDHGuu+5XbjPadJKLGM1u94fNw5W+cGl8pS//eZpqtPQ8wv6dnZfvW0deUa68l64Uxg+oGPx0sZIxXPJ3sSXY/rGLAkGVNhuW7VZtemxlBCReLXPrkJAIxvB40kSj8T7tp6sjsgln11x2ydvVVXz2UimszPhGvMS8K1XLy1txGXnp8ddF7UdThx5CD7Za1A952neNZLXitjDFiSkKkbSH9pmlzOTahIfHPtYfvfsUTpZ8pdW092B0S7r+6YrbM3i/VcJqrJ/Ey5xrwiXMvFlILBxqBAXYeT84Pf9Nxd5yle9ZLXyhgDliSldwOpGTyjaa4LF4mH+o4VYqee7A6IdF9ea8pNFtGeS+bzmSOSlgs9WE1knZmIbsruxoAlTnp6CuNrn9wUshtocv6giJrrIonEI4nSvTaFc7S6kv6e7A6IdF9ea8pNFtGeS+bzmSVcfdmVlo3uGCzrhSd74ikl0QnoLVRzoXpng6IibDXdcjzILptbSwvtd08sr94jmiezIy7c80sK7UJtetGW23dSTx5/d4g1/fIOquaByx3nIt6i2deCEJXk/JJCTwePiRLLuWQ+n3nC1YmR1pm6eNahPVkv9SS2sMRJVwY6RRNZm7ps9BeHRdv8GCoSjzRK78kBqKHEepcSS/rV95PzB7luB0BcWpfOxK6H7mi1c9umPJdnSv5SbMLVibG2bMSrDu3NdQUDljiKtTkwmsfQTE3QlVU1SEvtaCyTFW4kQvXBqm1HOpiwJweguunKI33Rpl9/aZzahlpevpG6q87ErofuePGa2zY37msEAEwpyHYs35vzl6IX7ZiVaAdgx6MO7c11BQOWOItloFM0kbVpcjC5P9NjyG4iicSjjdITPdCrq3cp0aRfPxd6vqhgJR554LXR+j2hO1rt3LYZ6lz11vyl6ISrL9UkjV1t2ehqHdqb6woGLDEK1bSspkGPtTkw2kcqYx1YFSoSV3ec0UbpXhjo1ZW7lEjT35UZhylysZ7LUN1JQEdLZLIOJvdims4E4Vou3trb9ZeqAj1fhyZTeWLAEiNT07Js/ru1tPsj6642P4YqhKFaaty27aUpnGO5S4km/V2ZcZiiE8u5jKQ7acf+oxEPJg+1nZ7mxTSdCboye3as4096og5NpvIUU8Dy6KOP4pFHHkFDQwPGjh2LFStWYNKkScZlT548iYqKCjz99NOor6/HV7/6VTz88MP47ne/ay/j9/tx77334tlnn0VDQwNyc3Px4x//GIsWLYLP582nS/SmPvlv0xgTuY6baCJrrw2s8mJ6orlLiTb9pu9inXGYQovljjNcdxKApBpM7vU0Udclqg5NpvIUdcCydu1alJWV4fHHH0dRURGWLl2KadOmYffu3RgyZEjQ8osWLcKzzz6LVatWYcyYMfjLX/6Cq6++Ghs3bsTFF18MAHj44Yfx2GOP4emnn8aFF16Ibdu2YdasWcjKysL8+fO7fpTdRJ5o9cSZfoIjbQ6MNrL22sCqrqTHCy/riiX9pm6gWGYcJnddueN0605SnyXTYPJY0hSv6yqZugyUZExzIut0L5ZxIytKkyZNsubOnWv/7ff7rdzcXKuiosK4/LnnnmutXLnS8dn3v/996/rrr7f/vuKKK6wbb7wx5DLhNDc3WwCs5ubmiNeJl8I7X7XOu+Nlq/DOV2Naf9nfaqzz7njZWva3mog+723iefyJyMv88les8+542covf6XH9nkmiNe5lNdnV7bZ1eu8O4RKU7zyLxnrp2RMsxckqoxH+vsdVQtLe3s7tm/fjoULF9qfpaSkoLS0FJs2bTKu09bWhvT0dMdnGRkZ2LBhg/33lClT8OSTT6KmpgajR4/GP//5T2zYsAGVlZXRJC8h4jFAymutJT0tnk2SPZ2Xy6v3uHYDnSnnr7vE41zq1+fGfbENjPTCYPJo0xSv6yqZugyUZExzonmxjAeJJgqqr6+3AFgbN250fP7zn//cmjRpknGdH/3oR9YFF1xg1dTUWH6/3/rrX/9qZWRkWGlpafYyfr/fuuOOOyyfz2f16dPH8vl81oMPPhgyLa2trVZzc7P9X11dXY+3sOjROqP3rlH5p6J8r+cjz7+3xev8ePE8R5OmeF1XyXZ9WlZypjkREl3Gu6WFJRbLli3D7NmzMWbMGPh8PhQUFGDWrFl46qmn7GWee+45/P73v8fq1atx4YUXYteuXbjtttuQm5uLmTNnGrdbUVGB++67r7uT78prg0x7g1ieBkkUnn9vi9f58eJ5jmWAeDyuq2S6PpVkTHNP82IZdxNVwJKdnY3U1FQcPHjQ8fnBgwcxdOhQ4zrnnHMOXnrpJbS2tqKpqQm5ubkoLy9Hfn6+vczPf/5zlJeX49prrwUAfP3rX8f+/ftRUVHhGrAsXLgQZWVl9t8tLS3Iy8uL5nC65EzvxukOSdEkeRrPv7fF6/x48TxHm6Z4XVfJdH0qyZjmnubFMu4q2qabSZMmWfPmzbP/9vv91rBhw1wH3era29utgoICa+HChfZngwYNsn796187lnvwwQetwsLCiNOVyEG31HWJbpIk6o16c7dYOMmY5jNVt3UJlZWVYebMmZgwYQImTZqEpUuX4vjx45g1axYA4IYbbsCwYcNQUVEBANiyZQvq6+sxbtw41NfX495770UgEMDtt99ub/PKK6/EAw88gBEjRuDCCy/Ezp07UVlZiRtvvDEuQRl5WzI1SRIli97cLRZOMqaZwos6YJkxYwY+//xzLF68GA0NDRg3bhzWrVuHnJwcAMCBAweQkpJiL9/a2opFixahtrYW/fr1w/Tp0/HMM89gwIAB9jIrVqzA3XffjZ/+9Kc4dOgQcnNz8T//8z9YvHhx14+QPC+pmiSJkkRv7hYLJxnTTOH5LMvqFWeupaUFWVlZaG5uRmZmZqKTQ0RERBGI9Peb7xJKAsk4a2NvxvNBRIniVv8sqarB1o8PY+LIQUH1T2+pl1LCL0KJpl5Otbx6j+Nz1U+bmuLN9y31VjwfRJQobvXP1o8PY+O+Jmz9+LDj895UL7GFJQlw1kZv4fkgokRxq3827mvClILB2LivyX58u7fVSxzDkkRU4VNzCvSWQpiseD6IKFHc6p9krJci/f1ml1ASmV9SaBdCztqYeDwfRJQobvVPb66XGLAkEdOsjZQ4PB9EpFtiGF+iLK/egyWnu3K6yq3+6c31EsewJAm9L1L9DXACpETg+SAiEzUoFnDWBbLO6Cq3+mdzbRM27mvqtfUSA5YkwFkbvYXng4jcdPegfLf6RwUrUwoG99p6iQFLEuCsjd7C80HhcK6eM5sMFNTbouM1+NWt/pk4cpDj/3paekO9xKeEiIjizO2Ourc9Zkqhjb7rNXs8Sc0Dlyc6OZ7FmW6JiBKEc/WQafArz3vXMGAhIuoG3dktQN7GQfndgwELEVE3mV9SaAcrvW1ODDLjoPzuw4CFiKibsFvgzMNB+d2HAQsRUTdgt8CZKdTTXzzvXcOAhYgoztgtQBR/DFiIiOKM3QJE8cd5WIiIiChh+LZmIiIi6jUYsBAREZHnMWAhIiIiz2PAQkRERJ7HgIWIiIg8jwELEREReR4DFiIiIvI8BixERETkeQxYiIiIyPMYsBAREZHnMWAhIiIiz2PAQnQGWFJVg+XVe4zfLa/egyWn3yBMRORVDFiIzgCpKT5UGoKW5dV7UFlVg9QUX4JSRkQUmT6JTgARdb/5JYUAgMrTLSnzSwrtYKVs6mj7eyIir2LAQnSGkEHLyvV70e4PMFghoqTBLiGiM8j8kkKkpaag3R9AWmoKgxUiShoMWIjOIMur99jBSrs/4DoQl4jIa9glRHSG0MesqL8BsKWFiDyPAQvZlpx+WsT047W8eg/8AQsLpo5OQMqoq0wDbE0DcYmIvIoBC9nUo6+A88dL/thRcvIHLOMAW/W3P2AlIllERBFjwEI2Pvrae4VqGeN5JaJkwICFHPjoK50J2P1JlHz4lBAF4aOv1Ntx5l+i5MMWFgpievSVQQv1Juz+JEo+MbWwPProoxg5ciTS09NRVFSEt99+23XZkydP4v7770dBQQHS09MxduxYrFu3Lmi5+vp6/Nd//RcGDx6MjIwMfP3rX8e2bdtiSR51gay0ax64HGVTRxvvRImS3fySQrt8j77rNQYrRB4XdcCydu1alJWV4Z577sGOHTswduxYTJs2DYcOHTIuv2jRIjzxxBNYsWIF3n//fcyZMwdXX301du7caS9z5MgRXHrppTjrrLPw2muv4f3338evfvUrDBw4MPYjo6i5PfrKoIV6K3Z/EiUPn2VZUT3PWFRUhIkTJ2LlypUAgEAggLy8PNxyyy0oLy8PWj43Nxd33XUX5s6da392zTXXICMjA88++ywAoLy8HG+99RbefPPNmA+kpaUFWVlZaG5uRmZmZszbOZNxICKdaVSQroIWtrAQ9bxIf7+jamFpb2/H9u3bUVpa2rmBlBSUlpZi06ZNxnXa2tqQnp7u+CwjIwMbNmyw//7zn/+MCRMm4Ac/+AGGDBmCiy++GKtWrQqZlra2NrS0tDj+o65ZEKKynl9SyGCFehV2fxIll6gClsbGRvj9fuTk5Dg+z8nJQUNDg3GdadOmobKyEnv27EEgEEBVVRVeeOEFfPbZZ/YytbW1eOyxx1BYWIi//OUvuPnmmzF//nw8/fTTrmmpqKhAVlaW/V9eXl40h0JEZzB2fxIln25/rHnZsmUoLCzEmDFjkJaWhnnz5mHWrFlISencdSAQwCWXXIIHH3wQF198MW666SbMnj0bjz/+uOt2Fy5ciObmZvu/urq67j4UIuolQs38WzZ1NGf+JfKgqB5rzs7ORmpqKg4ePOj4/ODBgxg6dKhxnXPOOQcvvfQSWltb0dTUhNzcXJSXlyM/P99e5txzz8UFF1zgWO9rX/sann/+ede09O3bF3379o0m+UREADjzL1EyiqqFJS0tDePHj0d1dbX9WSAQQHV1NYqLi0Oum56ejmHDhuHUqVN4/vnncdVVV9nfXXrppdi9e7dj+ZqaGpx33nnRJI/Is5aE6GZYXr0HS07PB0KJx3NF5E1RdwmVlZVh1apVePrpp/HBBx/g5ptvxvHjxzFr1iwAwA033ICFCxfay2/ZsgUvvPACamtr8eabb+K73/0uAoEAbr/9dnuZBQsWYPPmzXjwwQexd+9erF69Gk8++aTjySKiZMaZVZMHzxWRN0U90+2MGTPw+eefY/HixWhoaMC4ceOwbt06eyDugQMHHONTWltbsWjRItTW1qJfv36YPn06nnnmGQwYMMBeZuLEiXjxxRexcOFC3H///Rg1ahSWLl2K66+/vutHSOQBnFk1efBcEXlT1POweBXnYaFkwHk/kgfPFVHP6JZ5WIioazizavLguSLyFgYsRD3I9GJJ8iaeKyJv4duaiXqIPg5C/Q3wUVqv4bki8h4GLEQ9wG1mVQD8IfQYnisib2LAQtQDQs2sqr4nb+C5IvImPiVERERECcOnhIiIiKjXYMBCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEi6uWWVNVgefUe43fLq/dgSVVND6eIKHoMWIiIernUFB8qDUHL8uo9qKyqQWqKL0EpI4pcn0QngIiIutf8kkIAQOXplpT5JYV2sFI2dbT9PZGXxdTC8uijj2LkyJFIT09HUVER3n77bddlT548ifvvvx8FBQVIT0/H2LFjsW7dOtflH3roIfh8Ptx2222xJI2IiAzmlxSibOpoVFbVYPRdrzFYoaQTdcCydu1alJWV4Z577sGOHTswduxYTJs2DYcOHTIuv2jRIjzxxBNYsWIF3n//fcyZMwdXX301du7cGbTs1q1b8cQTT+Ab3/hG9EdCREQhzS8pRFpqCtr9AaSlpjBYoaQSdcBSWVmJ2bNnY9asWbjgggvw+OOP4+yzz8ZTTz1lXP6ZZ57BnXfeienTpyM/Px8333wzpk+fjl/96leO5b744gtcf/31WLVqFQYOHBjb0RARkavl1XvsYKXdH3AdiEvkRVEFLO3t7di+fTtKS0s7N5CSgtLSUmzatMm4TltbG9LT0x2fZWRkYMOGDY7P5s6diyuuuMKx7VDa2trQ0tLi+I+IiMzkmJWaBy63u4cYtFCyiGrQbWNjI/x+P3Jychyf5+Tk4MMPPzSuM23aNFRWVuJb3/oWCgoKUF1djRdeeAF+v99eZs2aNdixYwe2bt0acVoqKipw3333RZN8IqIzkmmArWkgLpGXdftjzcuWLUNhYSHGjBmDtLQ0zJs3D7NmzUJKSseu6+rqcOutt+L3v/99UEtMKAsXLkRzc7P9X11dXXcdAhFRUvMHLOMAWzUQ1x+wEpQyoshF1cKSnZ2N1NRUHDx40PH5wYMHMXToUOM655xzDl566SW0traiqakJubm5KC8vR35+PgBg+/btOHToEC655BJ7Hb/fj3/84x9YuXIl2trakJqaGrTdvn37om/fvtEkn4jojLRg6mjX79iyQskiqhaWtLQ0jB8/HtXV1fZngUAA1dXVKC4uDrlueno6hg0bhlOnTuH555/HVVddBQAoKSnBO++8g127dtn/TZgwAddffz127dplDFaIiIjozBL1xHFlZWWYOXMmJkyYgEmTJmHp0qU4fvw4Zs2aBQC44YYbMGzYMFRUVAAAtmzZgvr6eowbNw719fW49957EQgEcPvttwMA+vfvj4suusixj6985SsYPHhw0OdERNQ1S07PbGtqWVlevQf+gBWyRYYoUaIOWGbMmIHPP/8cixcvRkNDA8aNG4d169bZA3EPHDhgj08BgNbWVixatAi1tbXo168fpk+fjmeeeQYDBgyI20EQEVFk1DT9gLM7SA7MJfIin2VZvWK0VUtLC7KystDc3IzMzMxEJ4eIyLP0p4Y4TT8lUqS/33yXEBHRGUY+0rxy/V60+wMMVsjz+LZmIqIzEKfpp2TDgIWI6AzEafop2bBLiIjoDOM2hgXgvCzkXQxYiIjOIJymn5IVAxYiojNIqGn61fdEXsTHmomIiChhIv395qBbIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5MQUsjz76KEaOHIn09HQUFRXh7bffdl325MmTuP/++1FQUID09HSMHTsW69atcyxTUVGBiRMnon///hgyZAi+973vYffu3bEkjYiIiHqhqAOWtWvXoqysDPfccw927NiBsWPHYtq0aTh06JBx+UWLFuGJJ57AihUr8P7772POnDm4+uqrsXPnTnuZN954A3PnzsXmzZtRVVWFkydP4jvf+Q6OHz8e+5ERERFRr+GzLMuKZoWioiJMnDgRK1euBAAEAgHk5eXhlltuQXl5edDyubm5uOuuuzB37lz7s2uuuQYZGRl49tlnjfv4/PPPMWTIELzxxhv41re+FVG6WlpakJWVhebmZmRmZkZzSERERJQgkf5+R9XC0t7eju3bt6O0tLRzAykpKC0txaZNm4zrtLW1IT093fFZRkYGNmzY4Lqf5uZmAMCgQYNcl2lra0NLS4vjPyIiIuqdogpYGhsb4ff7kZOT4/g8JycHDQ0NxnWmTZuGyspK7NmzB4FAAFVVVXjhhRfw2WefGZcPBAK47bbbcOmll+Kiiy5yTUtFRQWysrLs//Ly8qI5FCIiIkoi3f6U0LJly1BYWIgxY8YgLS0N8+bNw6xZs5CSYt713Llz8e6772LNmjUht7tw4UI0Nzfb/9XV1XVH8omIiMgDogpYsrOzkZqaioMHDzo+P3jwIIYOHWpc55xzzsFLL72E48ePY//+/fjwww/Rr18/5OfnBy07b948vPzyy/j73/+O4cOHh0xL3759kZmZ6fiPiIiIeqeoApa0tDSMHz8e1dXV9meBQADV1dUoLi4OuW56ejqGDRuGU6dO4fnnn8dVV11lf2dZFubNm4cXX3wR69evx6hRo6I8DCIiIurN+kS7QllZGWbOnIkJEyZg0qRJWLp0KY4fP45Zs2YBAG644QYMGzYMFRUVAIAtW7agvr4e48aNQ319Pe69914EAgHcfvvt9jbnzp2L1atX409/+hP69+9vj4fJyspCRkZGPI6TiIiIkljUAcuMGTPw+eefY/HixWhoaMC4ceOwbt06eyDugQMHHONTWltbsWjRItTW1qJfv36YPn06nnnmGQwYMMBe5rHHHgMAfPvb33bs67e//S1+/OMfR39URERE1KtEPQ+LV3EeFiIiouTTLfOwEBERESUCAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzGLAQESWZJVU1WF69x/jd8uo9WFJV08MpIup+DFiIiJJMaooPlYagZXn1HlRW1SA1xZeglBF1nz6JTgAREUVnfkkhAKDydEvK/JJCO1gpmzra/p6oN2HAQkSUhGTQsnL9XrT7AwxWqFdjlxARUZKaX1KItNQUtPsDSEtNYbBCvVpMAcujjz6KkSNHIj09HUVFRXj77bddlz158iTuv/9+FBQUID09HWPHjsW6deu6tE0iIuoYs6KClXZ/wHUgLlFvEHXAsnbtWpSVleGee+7Bjh07MHbsWEybNg2HDh0yLr9o0SI88cQTWLFiBd5//33MmTMHV199NXbu3BnzNomIznRyzErNA5ejbOpo40Bcot7CZ1mWFc0KRUVFmDhxIlauXAkACAQCyMvLwy233ILy8vKg5XNzc3HXXXdh7ty59mfXXHMNMjIy8Oyzz8a0TZOWlhZkZWWhubkZmZmZ0RwSEVFScRtgy4G3lIwi/f2OatBte3s7tm/fjoULF9qfpaSkoLS0FJs2bTKu09bWhvT0dMdnGRkZ2LBhQ8zbVNtta2uz/25paYnmUIiIkpY/YBmDEvW3PxDVfShRUogqYGlsbITf70dOTo7j85ycHHz44YfGdaZNm4bKykp861vfQkFBAaqrq/HCCy/A7/fHvE0AqKiowH333RdN8omIeoUFU0e7fseWFeqtuv0poWXLlqGwsBBjxoxBWloa5s2bh1mzZiElpWu7XrhwIZqbm+3/6urq4pRiIiIi8pqooobs7Gykpqbi4MGDjs8PHjyIoUOHGtc555xz8NJLL+H48ePYv38/PvzwQ/Tr1w/5+fkxbxMA+vbti8zMTMd/RERE1DtFFbCkpaVh/PjxqK6utj8LBAKorq5GcXFxyHXT09MxbNgwnDp1Cs8//zyuuuqqLm+TiIiIzgxRz3RbVlaGmTNnYsKECZg0aRKWLl2K48ePY9asWQCAG264AcOGDUNFRQUAYMuWLaivr8e4ceNQX1+Pe++9F4FAALfffnvE2yQiIqIzW9QBy4wZM/D5559j8eLFaGhowLhx47Bu3Tp70OyBAwcc41NaW1uxaNEi1NbWol+/fpg+fTqeeeYZDBgwIOJtEhER0Zkt6nlYvIrzsBARESWfSH+/+S4hIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8ryYApZHH30UI0eORHp6OoqKivD222+HXH7p0qX46le/ioyMDOTl5WHBggVobW21v/f7/bj77rsxatQoZGRkoKCgAL/4xS9gWVYsySMiIqJepk+0K6xduxZlZWV4/PHHUVRUhKVLl2LatGnYvXs3hgwZErT86tWrUV5ejqeeegpTpkxBTU0NfvzjH8Pn86GyshIA8PDDD+Oxxx7D008/jQsvvBDbtm3DrFmzkJWVhfnz53f9KImIiCip+awomzGKioowceJErFy5EgAQCASQl5eHW265BeXl5UHLz5s3Dx988AGqq6vtz372s59hy5Yt2LBhAwDgP/7jP5CTk4P//d//tZe55pprkJGRgWeffTaidLW0tCArKwvNzc3IzMyM5pCIiIgoQSL9/Y6qS6i9vR3bt29HaWlp5wZSUlBaWopNmzYZ15kyZQq2b99udxvV1tbi1VdfxfTp0x3LVFdXo6amBgDwz3/+Exs2bMDll1/umpa2tja0tLQ4/iMiIqLeKaouocbGRvj9fuTk5Dg+z8nJwYcffmhc57rrrkNjYyP+7d/+DZZl4dSpU5gzZw7uvPNOe5ny8nK0tLRgzJgxSE1Nhd/vxwMPPIDrr7/eNS0VFRW47777okk+ERERJaluf0ro9ddfx4MPPohf//rX2LFjB1544QW88sor+MUvfmEv89xzz+H3v/89Vq9ejR07duDpp5/GL3/5Szz99NOu2124cCGam5vt/+rq6rr7UIiIiChBomphyc7ORmpqKg4ePOj4/ODBgxg6dKhxnbvvvhv//d//jZ/85CcAgK9//es4fvw4brrpJtx1111ISUnBz3/+c5SXl+Paa6+1l9m/fz8qKiowc+ZM43b79u2Lvn37RpN8IiIiSlJRtbCkpaVh/PjxjgG0gUAA1dXVKC4uNq7z5ZdfIiXFuZvU1FQAsB9bdlsmEAhEkzwiIiLqpaJ+rLmsrAwzZ87EhAkTMGnSJCxduhTHjx/HrFmzAAA33HADhg0bhoqKCgDAlVdeicrKSlx88cUoKirC3r17cffdd+PKK6+0A5crr7wSDzzwAEaMGIELL7wQO3fuRGVlJW688cY4HioRERElq6gDlhkzZuDzzz/H4sWL0dDQgHHjxmHdunX2QNwDBw44WksWLVoEn8+HRYsWob6+Huecc44doCgrVqzA3XffjZ/+9Kc4dOgQcnNz8T//8z9YvHhxHA6RiIiIkl3U87B4FedhISIiSj7dMg8LERERUSIwYCEiIiLPY8BCREREnseAhYiIiDyPAQsRERF5HgMWIiIi8jwGLEREROR5DFiIiIjI8xiwEBERkecxYCEiIiLPY8BCREREnseAhYiIiDyPAQuRsKSqBsur9xi/W169B0uqano4RUREBDBgIXJITfGh0hC0LK/eg8qqGqSm+BKUMiKiM1ufRCeAyEvmlxQCACpPt6TMLym0g5WyqaPt74mIqGcxYCHSyKBl5fq9aPcHGKwQESUYu4SIDOaXFCItNQXt/gDSUlMYrBARJRgDFiKD5dV77GCl3R9wHYhLREQ9g11CRBp9zIr6GwBbWoiIEoQBC5FgGmBrGohLREQ9iwELkeAPWMYBtupvf8BKRLKIiM54PsuyekUN3NLSgqysLDQ3NyMzMzPRySEiIqIIRPr7zUG3RERE5HkMWIiIiMjzGLAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BCxEREXkeAxYiIiLyPAYsRERE5HkMWIiIiMjzes27hNQbBlpaWhKcEiIiIoqU+t0O96agXhOwHDt2DACQl5eX4JQQERFRtI4dO4asrCzX73vNyw8DgQA+/fRT9O/fHz6fL27bbWlpQV5eHurq6vhSxW7GvO4ZzOeewXzuOczrntFd+WxZFo4dO4bc3FykpLiPVOk1LSwpKSkYPnx4t20/MzOTF0IPYV73DOZzz2A+9xzmdc/ojnwO1bKicNAtEREReR4DFiIiIvI8Bixh9O3bF/fccw/69u2b6KT0eszrnsF87hnM557DvO4Zic7nXjPoloiIiHovtrAQERGR5zFgISIiIs9jwEJERESex4CFiIiIPI8BSxiPPvooRo4cifT0dBQVFeHtt99OdJKSyj/+8Q9ceeWVyM3Nhc/nw0svveT43rIsLF68GOeeey4yMjJQWlqKPXv2OJY5fPgwrr/+emRmZmLAgAH4f//v/+GLL77owaPwvoqKCkycOBH9+/fHkCFD8L3vfQ+7d+92LNPa2oq5c+di8ODB6NevH6655hocPHjQscyBAwdwxRVX4Oyzz8aQIUPw85//HKdOnerJQ/G0xx57DN/4xjfsibOKi4vx2muv2d8zj7vHQw89BJ/Ph9tuu83+jHkdH/feey98Pp/jvzFjxtjfeyqfLXK1Zs0aKy0tzXrqqaes9957z5o9e7Y1YMAA6+DBg4lOWtJ49dVXrbvuust64YUXLADWiy++6Pj+oYcesrKysqyXXnrJ+uc//2n953/+pzVq1CjrxIkT9jLf/e53rbFjx1qbN2+23nzzTev888+3fvSjH/XwkXjbtGnTrN/+9rfWu+++a+3atcuaPn26NWLECOuLL76wl5kzZ46Vl5dnVVdXW9u2bbMmT55sTZkyxf7+1KlT1kUXXWSVlpZaO3futF599VUrOzvbWrhwYSIOyZP+/Oc/W6+88opVU1Nj7d6927rzzjuts846y3r33Xcty2Ied4e3337bGjlypPWNb3zDuvXWW+3Pmdfxcc8991gXXnih9dlnn9n/ff755/b3XspnBiwhTJo0yZo7d679t9/vt3Jzc62KiooEpip56QFLIBCwhg4daj3yyCP2Z0ePHrX69u1r/d///Z9lWZb1/vvvWwCsrVu32su89tprls/ns+rr63ss7cnm0KFDFgDrjTfesCyrI1/POuss6w9/+IO9zAcffGABsDZt2mRZVkdwmZKSYjU0NNjLPPbYY1ZmZqbV1tbWsweQRAYOHGj95je/YR53g2PHjlmFhYVWVVWV9e///u92wMK8jp977rnHGjt2rPE7r+Uzu4RctLe3Y/v27SgtLbU/S0lJQWlpKTZt2pTAlPUeH330ERoaGhx5nJWVhaKiIjuPN23ahAEDBmDChAn2MqWlpUhJScGWLVt6PM3Jorm5GQAwaNAgAMD27dtx8uRJR16PGTMGI0aMcOT117/+deTk5NjLTJs2DS0tLXjvvfd6MPXJwe/3Y82aNTh+/DiKi4uZx91g7ty5uOKKKxx5CrA8x9uePXuQm5uL/Px8XH/99Thw4AAA7+Vzr3n5Ybw1NjbC7/c7TgIA5OTk4MMPP0xQqnqXhoYGADDmsfquoaEBQ4YMcXzfp08fDBo0yF6GnAKBAG677TZceumluOiiiwB05GNaWhoGDBjgWFbPa9O5UN9Rh3feeQfFxcVobW1Fv3798OKLL+KCCy7Arl27mMdxtGbNGuzYsQNbt24N+o7lOX6Kiorwu9/9Dl/96lfx2Wef4b777sM3v/lNvPvuu57LZwYsRL3M3Llz8e6772LDhg2JTkqv9NWvfhW7du1Cc3Mz/vjHP2LmzJl44403Ep2sXqWurg633norqqqqkJ6enujk9GqXX365/e9vfOMbKCoqwnnnnYfnnnsOGRkZCUxZMHYJucjOzkZqamrQaOiDBw9i6NChCUpV76LyMVQeDx06FIcOHXJ8f+rUKRw+fJjnwWDevHl4+eWX8fe//x3Dhw+3Px86dCja29tx9OhRx/J6XpvOhfqOOqSlpeH888/H+PHjUVFRgbFjx2LZsmXM4zjavn07Dh06hEsuuQR9+vRBnz598MYbb2D58uXo06cPcnJymNfdZMCAARg9ejT27t3ruTLNgMVFWloaxo8fj+rqavuzQCCA6upqFBcXJzBlvceoUaMwdOhQRx63tLRgy5Ytdh4XFxfj6NGj2L59u73M+vXrEQgEUFRU1ONp9irLsjBv3jy8+OKLWL9+PUaNGuX4fvz48TjrrLMceb17924cOHDAkdfvvPOOI0CsqqpCZmYmLrjggp45kCQUCATQ1tbGPI6jkpISvPPOO9i1a5f934QJE3D99dfb/2Zed48vvvgC+/btw7nnnuu9Mh3XIby9zJo1a6y+fftav/vd76z333/fuummm6wBAwY4RkNTaMeOHbN27txp7dy50wJgVVZWWjt37rT2799vWVbHY80DBgyw/vSnP1n/+te/rKuuusr4WPPFF19sbdmyxdqwYYNVWFjIx5o1N998s5WVlWW9/vrrjscTv/zyS3uZOXPmWCNGjLDWr19vbdu2zSouLraKi4vt79Xjid/5znesXbt2WevWrbPOOeccPgYqlJeXW2+88Yb10UcfWf/617+s8vJyy+fzWX/9618ty2Iedyf5lJBlMa/j5Wc/+5n1+uuvWx999JH11ltvWaWlpVZ2drZ16NAhy7K8lc8MWMJYsWKFNWLECCstLc2aNGmStXnz5kQnKan8/e9/twAE/Tdz5kzLsjoebb777rutnJwcq2/fvlZJSYm1e/duxzaampqsH/3oR1a/fv2szMxMa9asWdaxY8cScDTeZcpjANZvf/tbe5kTJ05YP/3pT62BAwdaZ599tnX11Vdbn332mWM7H3/8sXX55ZdbGRkZVnZ2tvWzn/3MOnnyZA8fjXfdeOON1nnnnWelpaVZ55xzjlVSUmIHK5bFPO5OesDCvI6PGTNmWOeee66VlpZmDRs2zJoxY4a1d+9e+3sv5bPPsiwrvm02RERERPHFMSxERETkeQxYiIiIyPMYsBAREZHnMWAhIiIiz2PAQkRERJ7HgIWIiIg8jwELEREReR4DFiIiIvI8BixERETkeQxYiIiIyPMYsBAREZHnMWAhIiIiz/v/mnlEtjFNpjsAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(pvalues, \"x\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "42ecee87-31b9-4c52-b867-348569050142", + "metadata": {}, + "outputs": [], + "source": [ + "permute_per_tree = True\n", + "sample_dataset_per_tree = False\n", + "\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "# initialize hypothesis tester\n", + "est = FeatureImportanceForestClassifier(\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d2193297-b494-4cc4-8cf9-ea2f209e907b", + "metadata": {}, + "outputs": [], + "source": [ + "stats = []\n", + "pvalues = []\n", + "\n", + "for idx in range(500):\n", + " X = rng.standard_normal(size=(n_samples, n_features))\n", + " y = rng.binomial(1, 0.5, size=n_samples) # .reshape(-1, 1)\n", + " stat, pvalue = est.test(X, y, covariate_index=[0], metric=\"mi\")\n", + " est.reset()\n", + "\n", + " stats.append(stat)\n", + " pvalues.append(pvalue)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "65cfa941-9009-430f-a709-02a781db18f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(pvalues, \"x\")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "2c4c4933-9735-4660-917f-641be99bf48a", + "metadata": {}, + "outputs": [], + "source": [ + "permute_per_tree = False\n", + "sample_dataset_per_tree = True\n", + "\n", + "rng = np.random.default_rng(seed)\n", + "\n", + "# initialize hypothesis tester\n", + "est = FeatureImportanceForestClassifier(\n", + " RandomForestClassifier(\n", + " max_features=1.0,\n", + " random_state=seed,\n", + " n_estimators=n_estimators,\n", + " n_jobs=-1,\n", + " ),\n", + " random_state=seed,\n", + " test_size=test_size,\n", + " permute_per_tree=permute_per_tree,\n", + " sample_dataset_per_tree=sample_dataset_per_tree,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "2beb63bf-77e3-43bc-8259-f6e8a149825d", + "metadata": {}, "outputs": [], + "source": [ + "stats = []\n", + "pvalues = []\n", + "\n", + "for idx in range(500):\n", + " X = rng.standard_normal(size=(n_samples, n_features))\n", + " y = rng.binomial(1, 0.5, size=n_samples) # .reshape(-1, 1)\n", + " stat, pvalue = est.test(X, y, covariate_index=[0], metric=\"mi\")\n", + " est.reset()\n", + "\n", + " stats.append(stat)\n", + " pvalues.append(pvalue)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "54654a10-40ff-44b0-9015-c97d34e00dd9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "plt.plot(pvalues, \"x\")" ] @@ -161,7 +413,7 @@ { "cell_type": "code", "execution_count": null, - "id": "42ecee87-31b9-4c52-b867-348569050142", + "id": "37269289-a50d-4a7d-ae32-cde44e38ae97", "metadata": {}, "outputs": [], "source": [] diff --git a/sktree/conftest.py b/sktree/conftest.py index 7b69b7098..ba40aa1fb 100644 --- a/sktree/conftest.py +++ b/sktree/conftest.py @@ -1,22 +1,10 @@ -# import pytest +import pytest - -# def pytest_addoption(parser): -# parser.addoption( -# "--runslow", action="store_true", default=False, help="run slow tests" -# ) +# With the following global module marker, +# monitoring is disabled by default: +pytestmark = [pytest.mark.monitor_skip_test] def pytest_configure(config): """Set up pytest markers.""" config.addinivalue_line("markers", "slowtest: mark test as slow") - - -# def pytest_collection_modifyitems(config, items): -# if config.getoption("--runslow"): -# # --runslow given in cli: do not skip slow tests -# return -# skip_slow = pytest.mark.skip(reason="need --runslow option to run") -# for item in items: -# if "slow" in item.keywords: -# item.add_marker(skip_slow) diff --git a/sktree/stats/tests/meson.build b/sktree/stats/tests/meson.build index 2f18f5a65..c0b2305ff 100644 --- a/sktree/stats/tests/meson.build +++ b/sktree/stats/tests/meson.build @@ -1,6 +1,7 @@ python_sources = [ '__init__.py', - 'test_forestht.py' + 'test_forestht.py', + 'test_coleman.py' ] py3.install_sources( diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py index de7a4eb5e..2ad605229 100644 --- a/sktree/stats/tests/test_forestht.py +++ b/sktree/stats/tests/test_forestht.py @@ -372,3 +372,22 @@ def test_small_dataset(): stat, pvalue = clf.test(X, y, metric="mi") assert stat == 0.0 assert pvalue > 0.05 + + +# @pytest.mark.monitor_test +# def test_memory_usage(): +# n_samples = 1000 +# n_features = 5000 +# X = rng.uniform(size=(n_samples, n_features)) +# y = rng.integers(0, 2, size=n_samples) # Binary classification + +# clf = FeatureImportanceForestClassifier( +# estimator=HonestForestClassifier( +# n_estimators=10, random_state=seed, n_jobs=-1, honest_fraction=0.5 +# ), +# test_size=0.2, +# permute_per_tree=False, +# sample_dataset_per_tree=False, +# ) + +# stat, pvalue = clf.test(X, y, covariate_index=[1, 2], metric="mi") From 60d9c856bf4fe65ae42f154c9fda12415696b519 Mon Sep 17 00:00:00 2001 From: Adam Li Date: Thu, 5 Oct 2023 10:30:05 -0400 Subject: [PATCH 70/70] Release v0.2 Signed-off-by: Adam Li --- pyproject.toml | 2 +- sktree/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 544710691..2c0d0c2af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ name = "scikit-tree" # 0.0.0 is standard placeholder for poetry-dynamic-versioning # any changes to this should not be checked in # -version = "0.2.0dev0" +version = "0.2.0" description = "Modern decision trees in Python" maintainers = [ {name="Neurodata", email="adam.li@columbia.edu"} diff --git a/sktree/__init__.py b/sktree/__init__.py index c6af80ea0..e0fe9e741 100644 --- a/sktree/__init__.py +++ b/sktree/__init__.py @@ -3,7 +3,7 @@ import os import sys -__version__ = "0.2.0dev0" +__version__ = "0.2.0" logger = logging.getLogger(__name__)