From 6c1684de176c957b4459ae50e53d878c245987ed Mon Sep 17 00:00:00 2001 From: Anthony Wang Date: Mon, 30 Nov 2020 19:30:20 +0100 Subject: [PATCH 1/2] Update pandas-profiling package required version --- conda-env.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda-env.yml b/conda-env.yml index a247d98..ffce778 100644 --- a/conda-env.yml +++ b/conda-env.yml @@ -6,7 +6,7 @@ dependencies: - python=3.7.* - numpy=1.18.* - pandas=1.0.* - - pandas-profiling=2.4.* + - pandas-profiling=2.8.* - matplotlib=3.2.* - seaborn=0.10.* - scikit-learn=0.22.* @@ -15,4 +15,4 @@ dependencies: - cudatoolkit=10.* - tqdm=4.43.* - ipython - - jupyter \ No newline at end of file + - jupyter From 29a08726262a4c18dfb1411cb7d1cbfe43f499e9 Mon Sep 17 00:00:00 2001 From: Anthony Wang Date: Mon, 30 Nov 2020 19:56:19 +0100 Subject: [PATCH 2/2] Improve imputting of missing values with column medians --- notebooks/CBFV/cbfv/composition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/CBFV/cbfv/composition.py b/notebooks/CBFV/cbfv/composition.py index 1ec971a..6396f0d 100644 --- a/notebooks/CBFV/cbfv/composition.py +++ b/notebooks/CBFV/cbfv/composition.py @@ -290,10 +290,10 @@ def generate_features(df, elem_prop='oliynyk', # get the column names cols = X.columns.values - # find the mean value of each column + # find the median value of each column median_values = X[cols].median() - # fill the missing values in each column with the columns mean value - X[cols] = X[cols].fillna(median_values.iloc[0]) + # fill the missing values in each column with the column's median value + X[cols] = X[cols].fillna(median_values) return X, y, formulae, skipped