From 7ee3f77f10863433a732b7d5efaf65c3dceb4b8a Mon Sep 17 00:00:00 2001 From: Andrew Schonfeld Date: Thu, 28 May 2020 23:55:50 -0400 Subject: [PATCH] * #136 & #86: column replacements for values --- docs/source/requirements.txt | 3 +- dtale/app.py | 3 +- dtale/column_replacements.py | 239 ++++++++++++++ dtale/static/css/main.css | 7 +- dtale/views.py | 67 ++++ setup.py | 1 + .../dtale/replacement/imputer-test.jsx | 139 +++++++++ .../dtale/replacement/spaces-test.jsx | 131 ++++++++ .../dtale/replacement/strings-test.jsx | 169 ++++++++++ .../dtale/replacement/value-test.jsx | 149 +++++++++ .../CreateReplacement-code-test.jsx | 77 +++++ static/__tests__/redux-test-utils.jsx | 5 + static/dtale/iframe/ColumnMenu.jsx | 8 + static/main.jsx | 4 + static/popups/Popup.jsx | 11 + .../popups/replacement/CreateReplacement.jsx | 295 ++++++++++++++++++ static/popups/replacement/Imputer.jsx | 117 +++++++ static/popups/replacement/Spaces.jsx | 71 +++++ static/popups/replacement/Strings.jsx | 120 +++++++ static/popups/replacement/Value.jsx | 273 ++++++++++++++++ tests/dtale/test_replacements.py | 180 +++++++++++ tests/dtale/test_views.py | 5 +- 22 files changed, 2068 insertions(+), 6 deletions(-) create mode 100644 dtale/column_replacements.py create mode 100644 static/__tests__/dtale/replacement/imputer-test.jsx create mode 100644 static/__tests__/dtale/replacement/spaces-test.jsx create mode 100644 static/__tests__/dtale/replacement/strings-test.jsx create mode 100644 static/__tests__/dtale/replacement/value-test.jsx create mode 100644 static/__tests__/popups/replacement/CreateReplacement-code-test.jsx create mode 100644 static/popups/replacement/CreateReplacement.jsx create mode 100644 static/popups/replacement/Imputer.jsx create mode 100644 static/popups/replacement/Spaces.jsx create mode 100644 static/popups/replacement/Strings.jsx create mode 100644 static/popups/replacement/Value.jsx create mode 100644 tests/dtale/test_replacements.py diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt index 688debf73..a6579c8da 100644 --- a/docs/source/requirements.txt +++ b/docs/source/requirements.txt @@ -10,4 +10,5 @@ markdown pandas future Flask -scipy \ No newline at end of file +scipy +scikit-learn \ No newline at end of file diff --git a/dtale/app.py b/dtale/app.py index e9e196c52..7e24102ef 100644 --- a/dtale/app.py +++ b/dtale/app.py @@ -66,13 +66,14 @@ def __init__(self, *args, **kwargs): self.host = kwargs.pop('hostname', 'localhost') self.port = kwargs.pop('port', str(random.randint(0, 65535))) or str(random.randint(0, 65535)) super(DtaleFlaskTesting, self).__init__(*args, **kwargs) + self.application.config['SERVER_NAME'] = '{host}:{port}'.format(host=self.host, port=self.port) + self.application.config['SESSION_COOKIE_DOMAIN'] = 'localhost.localdomain' def get(self, *args, **kwargs): """ :param args: Optional arguments to be passed to :meth:`flask:flask.FlaskClient.get` :param kwargs: Optional keyword arguments to be passed to :meth:`flask:flask.FlaskClient.get` """ - self.application.config['SERVER_NAME'] = '{host}:{port}'.format(host=self.host, port=self.port) return super(DtaleFlaskTesting, self).get(url_scheme='http', *args, **kwargs) diff --git a/dtale/column_replacements.py b/dtale/column_replacements.py new file mode 100644 index 000000000..d79dd525f --- /dev/null +++ b/dtale/column_replacements.py @@ -0,0 +1,239 @@ +import re + +import numpy as np +import pandas as pd +from six import string_types + +import dtale.global_state as global_state +from dtale.utils import classify_type, find_dtype + + +class ColumnReplacement(object): + + def __init__(self, data_id, col, replacement_type, cfg, name=None): + self.data_id = data_id + if replacement_type == 'spaces': + self.builder = SpaceReplacement(col, cfg, name) + elif replacement_type == 'strings': + self.builder = StringReplacement(col, cfg, name) + elif replacement_type == 'value': + self.builder = ValueReplacement(col, cfg, name) + elif replacement_type == 'imputer': # iterative, knn, simple + self.builder = ImputerReplacement(col, cfg, name) + else: + raise NotImplementedError("'{}' replacement not implemented yet!".format(replacement_type)) + + def build_replacements(self): + return self.builder.build_column(global_state.get_data(self.data_id)) + + def build_code(self): + return self.builder.build_code(global_state.get_data(self.data_id)) + + +def get_inner_replacement_value(val): + return np.nan if isinstance(val, string_types) and val.lower() == 'nan' else val + + +def get_replacement_value(cfg, prop): + value = (cfg or {}).get(prop) or 'nan' + return get_inner_replacement_value(value) + + +def get_inner_replacement_value_as_str(val, series): + if isinstance(val, string_types) and val.lower() == 'nan': + return 'np.nan' + if classify_type(find_dtype(series)) == 'S': + return "'{value}'".format(value=val) + return val + + +def get_replacement_value_as_str(cfg, prop, series): + value = (cfg or {}).get(prop) or 'nan' + return get_inner_replacement_value_as_str(value, series) + + +class SpaceReplacement(object): + + def __init__(self, col, cfg, name): + self.col = col + self.cfg = cfg + self.name = name + + def build_column(self, data): + value = get_replacement_value(self.cfg, 'value') + return data[self.col].replace(r'^\s+$', value, regex=True) + + def build_code(self, data): + value = get_replacement_value_as_str(self.cfg, 'value', data[self.col]) + return "df.loc[:, '{name}'] = df['{col}'].replace(r'^\\s+$', {value}, regex=True)".format( + name=self.name or self.col, col=self.col, value=value + ) + + +class StringReplacement(object): + + def __init__(self, col, cfg, name): + self.col = col + self.cfg = cfg + self.name = name + + def parse_cfg(self): + return (self.cfg[p] for p in ['value', 'ignoreCase', 'isChar']) + + def build_column(self, data): + value, ignore_case, is_char = self.parse_cfg() + flags = re.UNICODE + if ignore_case: + flags |= re.IGNORECASE + value = re.escape(value) + if is_char: + value = '[{value}]+'.format(value=value) + regex_pat = re.compile(r'^ *{value} *$'.format(value=value), flags=flags) + replace_with = get_replacement_value(self.cfg, 'replace') + return data[self.col].replace(regex_pat, replace_with, regex=True) + + def build_code(self, data): + value, ignore_case, is_char = self.parse_cfg() + flags = re.UNICODE + if ignore_case: + flags |= re.IGNORECASE + + regex_exp = "r'^ *{value} *$'.format(value=re.escape({value}))" + if is_char: + regex_exp = "r'^ *[{value}]+ *$'.format(value=re.escape({value}))" + regex_exp = regex_exp.format(value=value) + + replace_with = get_replacement_value_as_str(self.cfg, 'replace', data[self.col]) + + return ( + "import re\n\n" + "regex_pat = re.compile({regex_exp}, flags={flags})\n" + "df.loc[:, '{name}'] = df['{col}'].replace(regex_pat, {replace}, regex=True)" + ).format(name=self.name or self.col, col=self.col, regex_exp=regex_exp, flags=flags, replace=replace_with) + + +class ValueReplacement(object): + + def __init__(self, col, cfg, name): + self.col = col + self.cfg = cfg + self.name = name + + def build_column(self, data): + s = data[self.col] + replacements = {} + col_replacements = [] + for replacement in self.cfg.get('value', []): + value = get_replacement_value(replacement, 'value') + replacement_type = replacement.get('type') + if replacement_type == 'agg': + replace = getattr(s, replacement['replace'])() # min, max, mean, median + if pd.isnull(replace): + raise Exception( + 'Running the aggregation, {agg}, on {col} resulted in nan, this would result in a no-op.' + ) + elif replacement_type == 'col': + col_replacements.append(lambda s2: np.where(s2 == value, data[replacement['replace']], s2)) + else: + replace = get_replacement_value(replacement, 'replace') + replacements[value] = replace + final_s = s + if len(replacements): + final_s = final_s.replace(replacements) + for col_r in col_replacements: + final_s = col_r(final_s) + return final_s + + def build_code(self, data): + replacements = [] + series = data[self.col] + col_replacements = [] + for replacement in self.cfg.get('value', []): + value = get_replacement_value_as_str(replacement, 'value', series) + replacement_type = self.cfg.get('type') + if replacement_type == 'agg': + replace = "getattr(df['{col}'], '{agg}')()".format(agg=replacement['value'], col=self.col) + elif replacement_type == 'col': + col_replacements.append("s = np.where(s == {value}, data['{col2}'], s)".format( + col2=replacement['replace'], value=value + )) + else: + replace = get_replacement_value_as_str(replacement, 'replace', series) + replacements.append('\t{value}: {replace}'.format(value=value, replace=replace)) + + code = ["s = df['{col}']".format(col=self.col)] + if len(replacements): + replacements = ',\n'.join(replacements) + replacements = '{\n' + replacements + '}' + code.append("s = s.replace({replacements})".format(replacements=replacements)) + code += col_replacements + code.append("df.loc[:, '{name}'] = s".format(name=self.name or self.col)) + return '\n'.join(code) + + +class ImputerReplacement(object): + + def __init__(self, col, cfg, name): + self.col = col + self.cfg = cfg + self.name = name + + def build_column(self, data): + imputer_type = self.cfg['type'] + if imputer_type == 'iterative': + try: + from sklearn.experimental import enable_iterative_imputer # noqa + from sklearn.impute import IterativeImputer + except ImportError: + raise Exception( + 'You must have at least scikit-learn 0.21.0 installed in order to use the Iterative Imputer!' + ) + imputer = IterativeImputer() + elif imputer_type == 'knn': + try: + from sklearn.impute import KNNImputer + except ImportError: + raise Exception( + 'You must have at least scikit-learn 0.22.0 installed in order to use the Iterative Imputer!' + ) + n_neighbors = self.cfg.get('n_neighbors') or 2 + imputer = KNNImputer(n_neighbors=n_neighbors) + elif imputer_type == 'simple': + try: + from sklearn.impute import SimpleImputer + except ImportError: + raise Exception( + 'You must have at least scikit-learn 0.20.0 installed in order to use the Iterative Imputer!' + ) + imputer = SimpleImputer() + else: + raise NotImplementedError("'{}' sklearn imputer not implemented yet!".format(imputer_type)) + output = imputer.fit_transform(data[[self.col]]) + return pd.DataFrame(output, columns=[self.col], index=data.index)[self.col] + + def build_code(self, _data): + imputer_type = self.cfg['type'] + code = [] + if imputer_type == 'iterative': + code.append(( + "from sklearn.experimental import enable_iterative_imputer\n" + "from sklearn.impute import IterativeImputer\n\n" + "output = IterativeImputer().fit_transform(df[['{col}']])" + ).format(col=self.col)) + elif imputer_type == 'knn': + n_neighbors = self.cfg.get('n_neighbors') or 2 + code.append(( + "from sklearn.impute import KNNImputer\n\n" + "output = KNNImputer(n_neighbors={n_neighbors}).fit_transform(df[['{col}']])" + ).format(col=self.col, n_neighbors=n_neighbors)) + elif imputer_type == 'simple': + code.append(( + "from sklearn.impute import SimpleImputer\n\n" + "output = SimpleImputer().fit_transform(df[['{col}']])" + ).format(col=self.col)) + code.append( + "df.loc[:, '{name}'] = pd.DataFrame(output, columns=['{col}'], index=df.index)['{col}']".format( + name=self.name or self.col, col=self.col + ) + ) + return '\n'.join(code) diff --git a/dtale/static/css/main.css b/dtale/static/css/main.css index 04a0b31f1..6125c5c5e 100644 --- a/dtale/static/css/main.css +++ b/dtale/static/css/main.css @@ -4551,7 +4551,8 @@ button.close { } } -div.build-modal > div.modal-lg { +div.build-modal > div.modal-lg, +div.replacement-modal > div.modal-lg { min-width: 720px; } @@ -4567,7 +4568,8 @@ div.filter-modal > div.modal-lg { .modal-lg { max-width: 800px; } - div.build-modal > div.modal-lg { + div.build-modal > div.modal-lg, + div.replacement-modal > div.modal-lg { max-width: 720px; } div.reshape-modal > div.modal-lg { @@ -10603,6 +10605,7 @@ div.container-fluid.code-export > div#popup-content > div.modal-footer { @media (min-height: 330px) { div.container-fluid.build > div#popup-content > div.modal-footer, + div.container-fluid.replacement > div#popup-content > div.modal-footer, div.container-fluid.reshape > div#popup-content > div.modal-footer { position: absolute; bottom: 0; diff --git a/dtale/views.py b/dtale/views.py index 462cfbedd..d8693470f 100644 --- a/dtale/views.py +++ b/dtale/views.py @@ -21,6 +21,7 @@ from dtale.cli.clickutils import retrieve_meta_info_and_version from dtale.column_builders import ColumnBuilder from dtale.column_filters import ColumnFilter +from dtale.column_replacements import ColumnReplacement from dtale.dash_application.charts import (build_raw_chart, chart_url_params, chart_url_querystring, export_chart, export_chart_data, url_encode_func) @@ -951,6 +952,72 @@ def reshape_data(data_id): return jsonify_error(e) +@dtale.route('/build-replacement/') +def build_replacement(data_id): + """ + :class:`flask:flask.Flask` route to handle the replacement of specific values within a column in a dataframe. Some + of the operations the are available are: + - spaces: replace values consisting of only spaces with a specific value + - value: replace specific values with a specific value or aggregation + - strings: replace values which contain a specific character or string (case-insensitive or not) with a + specific value + - imputer: replace nan values using sklearn imputers iterative, knn or simple + + :param data_id: integer string identifier for a D-Tale process's data + :type data_id: str + :param col: string from flask.request.args['col'] of the column to perform replacements upon + :param type: string from flask.request.args['type'] of the type of replacement to perform + (spaces/fillna/strings/imputer) + :param cfg: dict from flask.request.args['cfg'] of how to calculate the replacements + :return: JSON {success: True/False} + """ + + def build_data_ranges(data, col, dtype): + data_ranges = {} + if classify_type(dtype) == 'F' and not data[col].isnull().all(): + try: + data_ranges[col] = data[[col]].agg(['min', 'max']).to_dict()[col] + except ValueError: + pass + return data_ranges + + try: + data = global_state.get_data(data_id) + name = get_str_arg(request, 'name') + if name is not None: + name = str(name) + if name in data.columns: + raise Exception("A column named '{}' already exists!".format(name)) + col = get_str_arg(request, 'col') + replacement_type = get_str_arg(request, 'type') + cfg = json.loads(get_str_arg(request, 'cfg')) + + builder = ColumnReplacement(data_id, col, replacement_type, cfg) + output = builder.build_replacements() + dtype = find_dtype(output) + curr_dtypes = global_state.get_dtypes(data_id) + + if name is not None: + data.loc[:, name] = output + dtype_f = dtype_formatter(data, {name: dtype}, build_data_ranges(data, name, dtype)) + curr_dtypes.append(dtype_f(len(curr_dtypes), name)) + else: + data.loc[:, col] = output + dtype_f = dtype_formatter(data, {name: dtype}, build_data_ranges(data, name, dtype)) + col_index = next((i for i, d in enumerate(curr_dtypes) if d['name'] == col), None) + curr_col_dtype = dtype_f(col_index, col) + curr_dtypes = [curr_col_dtype if d['name'] == col else d for d in curr_dtypes] + + global_state.set_data(data_id, data) + global_state.set_dtypes(data_id, curr_dtypes) + curr_history = global_state.get_history(data_id) or [] + curr_history += [builder.build_code()] + global_state.set_history(data_id, curr_history) + return jsonify(success=True) + except BaseException as e: + return jsonify_error(e) + + @dtale.route('/test-filter/') def test_filter(data_id): """ diff --git a/setup.py b/setup.py index a116e515f..10aa19bc9 100644 --- a/setup.py +++ b/setup.py @@ -70,6 +70,7 @@ def run_tests(self): "itsdangerous", "pandas", "requests", + "scikit-learn >= '0.21.0", "scipy", "six" ], diff --git a/static/__tests__/dtale/replacement/imputer-test.jsx b/static/__tests__/dtale/replacement/imputer-test.jsx new file mode 100644 index 000000000..e534bfe83 --- /dev/null +++ b/static/__tests__/dtale/replacement/imputer-test.jsx @@ -0,0 +1,139 @@ +import { mount } from "enzyme"; +import React from "react"; +import { Provider } from "react-redux"; + +import { expect, it } from "@jest/globals"; + +import mockPopsicle from "../../MockPopsicle"; +import { clickColMenuButton } from "../../iframe/iframe-utils"; +import reduxUtils from "../../redux-test-utils"; +import { buildInnerHTML, tickUpdate, withGlobalJquery } from "../../test-utils"; + +const originalOffsetHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetHeight"); +const originalOffsetWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetWidth"); +const originalInnerWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerWidth"); +const originalInnerHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerHeight"); + +describe("DataViewer tests", () => { + let result, CreateReplacement, Imputer; + + beforeAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", { + configurable: true, + value: 500, + }); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", { + configurable: true, + value: 500, + }); + Object.defineProperty(window, "innerWidth", { + configurable: true, + value: 1205, + }); + Object.defineProperty(window, "innerHeight", { + configurable: true, + value: 775, + }); + + const mockBuildLibs = withGlobalJquery(() => + mockPopsicle.mock(url => { + const { urlFetcher } = require("../../redux-test-utils").default; + return urlFetcher(url); + }) + ); + + const mockChartUtils = withGlobalJquery(() => (ctx, cfg) => { + const chartCfg = { ctx, cfg, data: cfg.data, destroyed: false }; + chartCfg.destroy = () => (chartCfg.destroyed = true); + chartCfg.getElementsAtXAxis = _evt => [{ _index: 0 }]; + chartCfg.getElementAtEvent = _evt => [{ _datasetIndex: 0, _index: 0, _chart: { config: cfg, data: cfg.data } }]; + return chartCfg; + }); + + jest.mock("popsicle", () => mockBuildLibs); + jest.mock("chart.js", () => mockChartUtils); + jest.mock("chartjs-plugin-zoom", () => ({})); + jest.mock("chartjs-chart-box-and-violin-plot/build/Chart.BoxPlot.js", () => ({})); + }); + + beforeEach(async () => { + const { DataViewer } = require("../../../dtale/DataViewer"); + CreateReplacement = require("../../../popups/replacement/CreateReplacement").ReactCreateReplacement; + Imputer = require("../../../popups/replacement/Imputer").Imputer; + + const store = reduxUtils.createDtaleStore(); + buildInnerHTML({ settings: "" }, store); + result = mount( + + + , + { attachTo: document.getElementById("content") } + ); + await tickUpdate(result); + // select column + result.find(".main-grid div.headerCell div").first().simulate("click"); + result.update(); + + clickColMenuButton(result, "Replacements"); + await tickUpdate(result); + }); + + afterAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", originalOffsetHeight); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", originalOffsetWidth); + Object.defineProperty(window, "innerWidth", originalInnerWidth); + Object.defineProperty(window, "innerHeight", originalInnerHeight); + }); + + const findImputerInputRow = (idx = 0) => result.find(Imputer).find("div.form-group").at(idx); + + it("DataViewer: imputer iterative replacement w/ new col", async () => { + result.find(CreateReplacement).find("div.form-group").first().find("button").last().simulate("click"); + result + .find(CreateReplacement) + .find("div.form-group") + .first() + .find("input") + .first() + .simulate("change", { target: { value: "cut_col" } }); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + expect(result.find(Imputer).length).toBe(1); + findImputerInputRow().find("button").first().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + }); + + it("DataViewer: imputer knn replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Imputer"), "validateImputerCfg"); + validationSpy.mockClear(); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + findImputerInputRow().find("button").at(1).simulate("click"); + findImputerInputRow(1) + .find("input") + .simulate("change", { target: { value: "3" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ + type: "knn", + nNeighbors: "3", + }); + }); + + it("DataViewer: imputer simple replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Imputer"), "validateImputerCfg"); + validationSpy.mockClear(); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + findImputerInputRow().find("button").last().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ type: "simple" }); + }); + + it("DataViewer: imputer cfg validation", () => { + const { validateImputerCfg } = require("../../../popups/replacement/Imputer"); + expect(validateImputerCfg({ type: null })).toBe("Please select an imputer!"); + }); +}); diff --git a/static/__tests__/dtale/replacement/spaces-test.jsx b/static/__tests__/dtale/replacement/spaces-test.jsx new file mode 100644 index 000000000..6ed5b1acd --- /dev/null +++ b/static/__tests__/dtale/replacement/spaces-test.jsx @@ -0,0 +1,131 @@ +import { mount } from "enzyme"; +import React from "react"; +import { Provider } from "react-redux"; + +import { expect, it } from "@jest/globals"; + +import mockPopsicle from "../../MockPopsicle"; +import { clickColMenuButton } from "../../iframe/iframe-utils"; +import reduxUtils from "../../redux-test-utils"; +import { buildInnerHTML, tickUpdate, withGlobalJquery } from "../../test-utils"; + +const originalOffsetHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetHeight"); +const originalOffsetWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetWidth"); +const originalInnerWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerWidth"); +const originalInnerHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerHeight"); + +describe("DataViewer tests", () => { + let result, CreateReplacement, Spaces; + + beforeAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", { + configurable: true, + value: 500, + }); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", { + configurable: true, + value: 500, + }); + Object.defineProperty(window, "innerWidth", { + configurable: true, + value: 1205, + }); + Object.defineProperty(window, "innerHeight", { + configurable: true, + value: 775, + }); + + const mockBuildLibs = withGlobalJquery(() => + mockPopsicle.mock(url => { + const { urlFetcher } = require("../../redux-test-utils").default; + return urlFetcher(url); + }) + ); + + const mockChartUtils = withGlobalJquery(() => (ctx, cfg) => { + const chartCfg = { ctx, cfg, data: cfg.data, destroyed: false }; + chartCfg.destroy = () => (chartCfg.destroyed = true); + chartCfg.getElementsAtXAxis = _evt => [{ _index: 0 }]; + chartCfg.getElementAtEvent = _evt => [{ _datasetIndex: 0, _index: 0, _chart: { config: cfg, data: cfg.data } }]; + return chartCfg; + }); + + jest.mock("popsicle", () => mockBuildLibs); + jest.mock("chart.js", () => mockChartUtils); + jest.mock("chartjs-plugin-zoom", () => ({})); + jest.mock("chartjs-chart-box-and-violin-plot/build/Chart.BoxPlot.js", () => ({})); + }); + + beforeEach(async () => { + const { DataViewer } = require("../../../dtale/DataViewer"); + CreateReplacement = require("../../../popups/replacement/CreateReplacement").ReactCreateReplacement; + Spaces = require("../../../popups/replacement/Spaces").Spaces; + + const store = reduxUtils.createDtaleStore(); + buildInnerHTML({ settings: "" }, store); + result = mount( + + + , + { attachTo: document.getElementById("content") } + ); + await tickUpdate(result); + // select column + result.find(".main-grid div.headerCell div").at(2).simulate("click"); + result.update(); + + clickColMenuButton(result, "Replacements"); + await tickUpdate(result); + }); + + afterAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", originalOffsetHeight); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", originalOffsetWidth); + Object.defineProperty(window, "innerWidth", originalInnerWidth); + Object.defineProperty(window, "innerHeight", originalInnerHeight); + }); + + it("DataViewer: spaces replacement w/ new col", async () => { + result.find(CreateReplacement).find("div.form-group").first().find("button").last().simulate("click"); + result + .find(CreateReplacement) + .find("div.form-group") + .first() + .find("input") + .first() + .simulate("change", { target: { value: "cut_col" } }); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").at(1).simulate("click"); + result.update(); + expect(result.find(Spaces).length).toBe(1); + const spacesInputs = result.find(Spaces).first(); + spacesInputs + .find("div.form-group") + .first() + .find("input") + .simulate("change", { target: { value: "nan" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + }); + + it("DataViewer: spaces replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Spaces"), "validateSpacesCfg"); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").at(1).simulate("click"); + result.update(); + expect(result.find(Spaces).length).toBe(1); + const spacesInputs = result.find(Spaces).first(); + spacesInputs + .find("div.form-group") + .first() + .find("input") + .simulate("change", { target: { value: "nan" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ replace: "nan" }); + }); + + it("DataViewer: spaces cfg validation", () => { + const { validateSpacesCfg } = require("../../../popups/replacement/Spaces"); + const cfg = { replace: null }; + expect(validateSpacesCfg(cfg)).toBe("Please enter a replacement value!"); + }); +}); diff --git a/static/__tests__/dtale/replacement/strings-test.jsx b/static/__tests__/dtale/replacement/strings-test.jsx new file mode 100644 index 000000000..099d857a2 --- /dev/null +++ b/static/__tests__/dtale/replacement/strings-test.jsx @@ -0,0 +1,169 @@ +import { mount } from "enzyme"; +import React from "react"; +import { Provider } from "react-redux"; + +import { expect, it } from "@jest/globals"; + +import { RemovableError } from "../../../RemovableError"; +import mockPopsicle from "../../MockPopsicle"; +import { clickColMenuButton } from "../../iframe/iframe-utils"; +import reduxUtils from "../../redux-test-utils"; +import { buildInnerHTML, tickUpdate, withGlobalJquery } from "../../test-utils"; + +const originalOffsetHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetHeight"); +const originalOffsetWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetWidth"); +const originalInnerWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerWidth"); +const originalInnerHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerHeight"); + +describe("DataViewer tests", () => { + let result, CreateReplacement, Strings; + + beforeAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", { + configurable: true, + value: 500, + }); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", { + configurable: true, + value: 500, + }); + Object.defineProperty(window, "innerWidth", { + configurable: true, + value: 1205, + }); + Object.defineProperty(window, "innerHeight", { + configurable: true, + value: 775, + }); + + const mockBuildLibs = withGlobalJquery(() => + mockPopsicle.mock(url => { + const { urlFetcher } = require("../../redux-test-utils").default; + return urlFetcher(url); + }) + ); + + const mockChartUtils = withGlobalJquery(() => (ctx, cfg) => { + const chartCfg = { ctx, cfg, data: cfg.data, destroyed: false }; + chartCfg.destroy = () => (chartCfg.destroyed = true); + chartCfg.getElementsAtXAxis = _evt => [{ _index: 0 }]; + chartCfg.getElementAtEvent = _evt => [{ _datasetIndex: 0, _index: 0, _chart: { config: cfg, data: cfg.data } }]; + return chartCfg; + }); + + jest.mock("popsicle", () => mockBuildLibs); + jest.mock("chart.js", () => mockChartUtils); + jest.mock("chartjs-plugin-zoom", () => ({})); + jest.mock("chartjs-chart-box-and-violin-plot/build/Chart.BoxPlot.js", () => ({})); + }); + + beforeEach(async () => { + const { DataViewer } = require("../../../dtale/DataViewer"); + CreateReplacement = require("../../../popups/replacement/CreateReplacement").ReactCreateReplacement; + Strings = require("../../../popups/replacement/Strings").Strings; + + const store = reduxUtils.createDtaleStore(); + buildInnerHTML({ settings: "" }, store); + result = mount( + + + , + { attachTo: document.getElementById("content") } + ); + await tickUpdate(result); + // select column + result.find(".main-grid div.headerCell div").at(2).simulate("click"); + result.update(); + + clickColMenuButton(result, "Replacements"); + await tickUpdate(result); + }); + + afterAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", originalOffsetHeight); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", originalOffsetWidth); + Object.defineProperty(window, "innerWidth", originalInnerWidth); + Object.defineProperty(window, "innerHeight", originalInnerHeight); + }); + + it("DataViewer: strings replacement w/ new col", async () => { + result.find(CreateReplacement).find("div.form-group").first().find("button").last().simulate("click"); + result + .find(CreateReplacement) + .find("div.form-group") + .first() + .find("input") + .first() + .simulate("change", { target: { value: "cut_col" } }); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + expect(result.find(Strings).length).toBe(1); + const stringsInputs = result.find(Strings).first(); + stringsInputs + .find("div.form-group") + .first() + .find("input") + .simulate("change", { target: { value: "nan" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + }); + + it("DataViewer: strings replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Strings"), "validateStringsCfg"); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + expect(result.find(Strings).length).toBe(1); + const stringsInputs = result.find(Strings).first(); + stringsInputs + .find("div.form-group") + .first() + .find("input") + .simulate("change", { target: { value: "A" } }); + stringsInputs.find("div.form-group").at(1).find("i").simulate("click"); + stringsInputs.find("div.form-group").at(2).find("i").simulate("click"); + stringsInputs + .find("div.form-group") + .last() + .find("input") + .simulate("change", { target: { value: "nan" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ + value: "A", + isChar: true, + ignoreCase: true, + replace: "nan", + }); + }); + + it("DataViewer: string replacement w/ new invalid col", async () => { + result.find(CreateReplacement).find("div.form-group").first().find("button").last().simulate("click"); + result + .find(CreateReplacement) + .find("div.form-group") + .first() + .find("input") + .first() + .simulate("change", { target: { value: "error" } }); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").last().simulate("click"); + result.update(); + expect(result.find(Strings).length).toBe(1); + const spacesInputs = result.find(Strings).first(); + spacesInputs + .find("div.form-group") + .first() + .find("input") + .simulate("change", { target: { value: "nan" } }); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(result.find(CreateReplacement).find(RemovableError)).toHaveLength(1); + }); + + it("DataViewer: strings cfg validation", () => { + const { validateStringsCfg } = require("../../../popups/replacement/Strings"); + let cfg = { value: null }; + expect(validateStringsCfg(cfg)).toBe("Please enter a character or substring!"); + cfg = { value: "A", replace: null }; + expect(validateStringsCfg(cfg)).toBe("Please enter a replacement value!"); + }); +}); diff --git a/static/__tests__/dtale/replacement/value-test.jsx b/static/__tests__/dtale/replacement/value-test.jsx new file mode 100644 index 000000000..d2b3314ad --- /dev/null +++ b/static/__tests__/dtale/replacement/value-test.jsx @@ -0,0 +1,149 @@ +import { mount } from "enzyme"; +import React from "react"; +import { Provider } from "react-redux"; +import Select from "react-select"; + +import { expect, it } from "@jest/globals"; + +import { RemovableError } from "../../../RemovableError"; +import mockPopsicle from "../../MockPopsicle"; +import { clickColMenuButton } from "../../iframe/iframe-utils"; +import reduxUtils from "../../redux-test-utils"; +import { buildInnerHTML, tickUpdate, withGlobalJquery } from "../../test-utils"; + +const originalOffsetHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetHeight"); +const originalOffsetWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "offsetWidth"); +const originalInnerWidth = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerWidth"); +const originalInnerHeight = Object.getOwnPropertyDescriptor(HTMLElement.prototype, "innerHeight"); + +describe("DataViewer tests", () => { + let result, CreateReplacement, Value; + + beforeAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", { + configurable: true, + value: 500, + }); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", { + configurable: true, + value: 500, + }); + Object.defineProperty(window, "innerWidth", { + configurable: true, + value: 1205, + }); + Object.defineProperty(window, "innerHeight", { + configurable: true, + value: 775, + }); + + const mockBuildLibs = withGlobalJquery(() => + mockPopsicle.mock(url => { + const { urlFetcher } = require("../../redux-test-utils").default; + return urlFetcher(url); + }) + ); + + const mockChartUtils = withGlobalJquery(() => (ctx, cfg) => { + const chartCfg = { ctx, cfg, data: cfg.data, destroyed: false }; + chartCfg.destroy = () => (chartCfg.destroyed = true); + chartCfg.getElementsAtXAxis = _evt => [{ _index: 0 }]; + chartCfg.getElementAtEvent = _evt => [{ _datasetIndex: 0, _index: 0, _chart: { config: cfg, data: cfg.data } }]; + return chartCfg; + }); + + jest.mock("popsicle", () => mockBuildLibs); + jest.mock("chart.js", () => mockChartUtils); + jest.mock("chartjs-plugin-zoom", () => ({})); + jest.mock("chartjs-chart-box-and-violin-plot/build/Chart.BoxPlot.js", () => ({})); + }); + + beforeEach(async () => { + const { DataViewer } = require("../../../dtale/DataViewer"); + CreateReplacement = require("../../../popups/replacement/CreateReplacement").ReactCreateReplacement; + Value = require("../../../popups/replacement/Value").Value; + + const store = reduxUtils.createDtaleStore(); + buildInnerHTML({ settings: "" }, store); + result = mount( + + + , + { attachTo: document.getElementById("content") } + ); + await tickUpdate(result); + // select column + result.find(".main-grid div.headerCell div").first().simulate("click"); + result.update(); + + clickColMenuButton(result, "Replacements"); + await tickUpdate(result); + }); + + afterAll(() => { + Object.defineProperty(HTMLElement.prototype, "offsetHeight", originalOffsetHeight); + Object.defineProperty(HTMLElement.prototype, "offsetWidth", originalOffsetWidth); + Object.defineProperty(window, "innerWidth", originalInnerWidth); + Object.defineProperty(window, "innerHeight", originalInnerHeight); + }); + + const findValueInputRow = (idx = 0) => result.find(Value).find("div.form-group").at(idx); + + it("DataViewer: value raw replacement w/ new col", async () => { + result.find(CreateReplacement).find("div.form-group").first().find("button").last().simulate("click"); + result + .find(CreateReplacement) + .find("div.form-group") + .first() + .find("input") + .first() + .simulate("change", { target: { value: "cut_col" } }); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").first().simulate("click"); + result.update(); + expect(result.find(Value).length).toBe(1); + findValueInputRow() + .find("input") + .simulate("change", { target: { value: "3" } }); + findValueInputRow(1).find("button").first().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + expect(result.find(CreateReplacement).find(RemovableError)).toHaveLength(1); + findValueInputRow(1).find("i").first().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + }); + + it("DataViewer: value agg replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Value"), "validateValueCfg"); + validationSpy.mockClear(); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").first().simulate("click"); + result.update(); + findValueInputRow(1).find("button").at(1).simulate("click"); + findValueInputRow(1).find(Select).first().instance().onChange({ value: "median" }); + findValueInputRow(1).find("i").first().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ + value: [{ type: "agg", value: "nan", replace: "median" }], + }); + }); + + it("DataViewer: value col replacement", async () => { + const validationSpy = jest.spyOn(require("../../../popups/replacement/Value"), "validateValueCfg"); + validationSpy.mockClear(); + result.find(CreateReplacement).find("div.form-group").at(1).find("button").first().simulate("click"); + result.update(); + findValueInputRow(1).find("button").last().simulate("click"); + findValueInputRow(1).find(Select).first().instance().onChange({ value: "col2" }); + findValueInputRow(1).find("i").first().simulate("click"); + result.find("div.modal-footer").first().find("button").first().simulate("click"); + await tickUpdate(result); + expect(validationSpy.mock.calls[0][0]).toStrictEqual({ + value: [{ type: "col", value: "nan", replace: "col2" }], + }); + }); + + it("DataViewer: value cfg validation", () => { + const { validateValueCfg } = require("../../../popups/replacement/Value"); + expect(validateValueCfg([])).toBe("Please add (+) a replacement!"); + }); +}); diff --git a/static/__tests__/popups/replacement/CreateReplacement-code-test.jsx b/static/__tests__/popups/replacement/CreateReplacement-code-test.jsx new file mode 100644 index 000000000..9e2331b93 --- /dev/null +++ b/static/__tests__/popups/replacement/CreateReplacement-code-test.jsx @@ -0,0 +1,77 @@ +import _ from "lodash"; + +import { expect, it } from "@jest/globals"; + +import { buildCode as buildImputerCode } from "../../../popups/replacement/Imputer"; +import { buildCode as buildSpacesCode } from "../../../popups/replacement/Spaces"; +import { buildCode as buildStringsCode } from "../../../popups/replacement/Strings"; +import { buildCode as buildValueCode } from "../../../popups/replacement/Value"; + +describe("CreateReplacement buildCode tests", () => { + it("Spaces buildCode test", () => { + let code = buildSpacesCode("foo", { replace: "a" }); + expect(code).toBe("df.loc[:, 'foo'] = df['foo'].replace(r'^\\\\s+$', 'a', regex=True)"); + + code = buildSpacesCode("foo", { replace: "nan" }); + expect(code).toBe("df.loc[:, 'foo'] = df['foo'].replace(r'^\\\\s+$', np.nan, regex=True)"); + + code = buildSpacesCode("foo", { replace: null }); + expect(code).toBeNull(); + }); + + it("Value buildCode test", () => { + let code = buildValueCode("foo", "string", [{ type: "raw", value: "nan", replace: "foo" }]); + expect(code).toStrictEqual(["s = df['foo']", "s = s.replace({", "\tnp.nan: 'foo',", "})"]); + code = buildValueCode("foo", "string", []); + expect(code).toBeNull(); + code = buildValueCode("foo", "string", [{ type: "raw", value: "a", replace: "foo" }]); + expect(code).toStrictEqual(["s = df['foo']", "s = s.replace({", "\t'a': 'foo',", "})"]); + code = buildValueCode("foo", "float", [{ type: "raw", value: 1.5, replace: 1.0 }]); + expect(code).toStrictEqual(["s = df['foo']", "s = s.replace({", "\t1.5: 1,", "})"]); + code = buildValueCode("foo", "float", [{ type: "col", value: 1.5, replace: "a" }]); + expect(code).toStrictEqual(["s = df['foo']", "s = np.where(s == 1.5, data['a'], s)"]); + code = buildValueCode("foo", "float", [{ type: "agg", value: 1.5, replace: "median" }]); + expect(code).toStrictEqual(["s = df['foo']", "s = s.replace({", "\t1.5: getattr(df['foo'], 'median')(),", "})"]); + }); + + it("Strings buildCode test", () => { + let cfg = { value: null, isChar: false, ignoreCase: false, replace: "nan" }; + let code = buildStringsCode("foo", "string", cfg); + expect(code).toBeNull(); + cfg.value = "foo"; + code = buildStringsCode("foo", "string", cfg); + expect(_.takeRight(code, 2)).toStrictEqual([ + `regex_pat = re.compile(r'^ *' + re.escape('foo') + ' *$', flags=re.UNICODE)`, + `df.loc[:, 'foo'] = df['foo'].replace(regex_pat, np.nan, regex=True)`, + ]); + cfg = { value: "f", isChar: true, ignoreCase: true, replace: "bizz" }; + code = buildStringsCode("foo", "string", cfg); + expect(_.takeRight(code, 2)).toStrictEqual([ + `regex_pat = re.compile(r'^ *[' + re.escape('f') + ']+ *$', flags=re.UNICODE | re.IGNORECASE)`, + `df.loc[:, 'foo'] = df['foo'].replace(regex_pat, 'bizz', regex=True)`, + ]); + }); + + it("Imputer buildCode test", () => { + let code = buildImputerCode("foo", { type: "iterative" }); + expect(code).toStrictEqual([ + "from sklearn.experimental import enable_iterative_imputer", + "from sklearn.impute import IterativeImputer", + "", + "output = IterativeImputer().fit_transform(df[['foo']])", + "df.loc[:, 'foo'] = pd.DataFrame(output, columns=['foo'], index=df.index)['foo']", + ]); + code = buildImputerCode("foo", { type: "knn", nNeighbors: 3 }); + expect(_.take(code, 3)).toStrictEqual([ + "from sklearn.impute import KNNImputer", + "", + "output = KNNImputer(n_neighbors=3).fit_transform(df[['foo']])", + ]); + code = buildImputerCode("foo", { type: "simple" }); + expect(_.take(code, 3)).toStrictEqual([ + "from sklearn.impute import SimpleImputer", + "", + "output = SimpleImputer().fit_transform(df[['foo']])", + ]); + }); +}); diff --git a/static/__tests__/redux-test-utils.jsx b/static/__tests__/redux-test-utils.jsx index 7787e71bd..bc289ff44 100644 --- a/static/__tests__/redux-test-utils.jsx +++ b/static/__tests__/redux-test-utils.jsx @@ -223,6 +223,11 @@ function urlFetcher(url) { return { error: "error test" }; } return { success: true, url: "http://localhost:40000/dtale/main/1" }; + } else if (_.startsWith(url, "/dtale/build-replacement")) { + if (urlParams.name === "error") { + return { error: "error test" }; + } + return { success: true }; } else if (_.startsWith(url, "/dtale/reshape")) { if (urlParams.index === "error") { return { error: "error test" }; diff --git a/static/dtale/iframe/ColumnMenu.jsx b/static/dtale/iframe/ColumnMenu.jsx index e63f17e35..c80f43e36 100644 --- a/static/dtale/iframe/ColumnMenu.jsx +++ b/static/dtale/iframe/ColumnMenu.jsx @@ -238,6 +238,14 @@ class ReactColumnMenu extends React.Component { +
  • + + + +
  • + ); + })} + +
    + {this.state.saveAs === "new" && ( + this.setState({ name: e.target.value })} + /> + )} +
    + + + +
    + +
    +
    + {_.map(types, ([type, label, _filterer], i) => { + const buttonProps = { className: "btn" }; + if (type === this.state.type) { + buttonProps.className += " btn-primary active"; + } else { + buttonProps.className += " btn-primary inactive"; + buttonProps.onClick = () => this.setState({ type }); + } + return ( + + ); + })} +
    + {this.state.type && {TYPE_DESC[this.state.type]}} +
    +
    + {body} + + ); + } + + renderCode() { + if (_.get(this.state, ["code", this.state.type])) { + const code = _.concat(_.get(this.state, ["code", this.state.type], []), []); + let markup; + if (_.size(code) > 2) { + markup = ( +
    +
    {code[0]}
    +
    {code[1]}
    +
    {"hover to see more..."}
    +
    +
    {_.join(code, "\n")}
    +
    +
    + ); + } else { + markup = ( +
    + {_.map(code, (c, i) => ( +
    {c}
    + ))} +
    + ); + } + return ( +
    + Code: + {markup} +
    + ); + } + return null; + } + + render() { + let error = null; + if (this.state.error) { + error = ( +
    +
    {this.state.error}
    +
    + ); + } + return [ + error, + + {this.renderBody()} + , +
    + {this.renderCode()} + +
    , + ]; + } +} +ReactCreateReplacement.displayName = "CreateColumn"; +ReactCreateReplacement.propTypes = { + dataId: PropTypes.string.isRequired, + chartData: PropTypes.shape({ + propagateState: PropTypes.func, + selectedCol: PropTypes.string, + }), + onClose: PropTypes.func, +}; + +const ReduxCreateReplacement = connect( + ({ dataId, chartData }) => ({ dataId, chartData }), + dispatch => ({ onClose: chartData => dispatch(closeChart(chartData || {})) }) +)(ReactCreateReplacement); +export { ReactCreateReplacement, ReduxCreateReplacement as CreateReplacement }; diff --git a/static/popups/replacement/Imputer.jsx b/static/popups/replacement/Imputer.jsx new file mode 100644 index 000000000..262b00756 --- /dev/null +++ b/static/popups/replacement/Imputer.jsx @@ -0,0 +1,117 @@ +import _ from "lodash"; +import PropTypes from "prop-types"; +import React from "react"; + +function validateImputerCfg({ type }) { + if (_.isNull(type)) { + return "Please select an imputer!"; + } + return null; +} + +function buildCode(col, cfg) { + const { type, nNeighbors } = cfg; + const code = []; + if (type == "iterative") { + code.push("from sklearn.experimental import enable_iterative_imputer"); + code.push("from sklearn.impute import IterativeImputer"); + code.push(""); + code.push(`output = IterativeImputer().fit_transform(df[['${col}']])`); + } else if (type === "knn") { + code.push("from sklearn.impute import KNNImputer"); + code.push(""); + code.push(`output = KNNImputer(n_neighbors=${nNeighbors ?? 2}).fit_transform(df[['${col}']])`); + } else if (type === "simple") { + code.push("from sklearn.impute import SimpleImputer"); + code.push(""); + code.push(`output = SimpleImputer().fit_transform(df[['${col}']])`); + } + code.push(`df.loc[:, '${col}'] = pd.DataFrame(output, columns=['${col}'], index=df.index)['${col}']`); + return code; +} + +class Imputer extends React.Component { + constructor(props) { + super(props); + this.state = { type: null, nNeighbors: null }; + this.updateState = this.updateState.bind(this); + this.renderImputerInputs = this.renderImputerInputs.bind(this); + } + + updateState(state) { + const currState = _.assignIn(this.state, state); + const props = ["type"]; + if (currState.type === "knn") { + props.push("nNeighbors"); + } + let cfg = _.pick(currState, props); + cfg = _.pickBy(cfg, _.identity); + this.setState(currState, () => + this.props.updateState({ + cfg, + code: buildCode(this.props.col, currState), + }) + ); + } + + renderImputerInputs() { + if (this.state.type !== "knn") { + return null; + } + return ( +
    + +
    + this.updateState({ nNeighbors: e.target.value })} + /> + Default: 2 +
    +
    + ); + } + + render() { + return [ +
    + +
    +
    + {_.map( + [ + ["iterative", "Iterative"], + ["knn", "KNN"], + ["simple", "Simple"], + ], + ([type, label]) => { + const buttonProps = { className: "btn btn-primary" }; + if (type === this.state.type) { + buttonProps.className += " active"; + } else { + buttonProps.className += " inactive"; + buttonProps.onClick = () => this.updateState({ type }); + } + return ( + + ); + } + )} +
    +
    +
    , + this.renderImputerInputs(), + ]; + } +} +Imputer.displayName = "Imputer"; +Imputer.propTypes = { + updateState: PropTypes.func, + col: PropTypes.string, +}; + +export { Imputer, validateImputerCfg, buildCode }; diff --git a/static/popups/replacement/Spaces.jsx b/static/popups/replacement/Spaces.jsx new file mode 100644 index 000000000..5ca4efae7 --- /dev/null +++ b/static/popups/replacement/Spaces.jsx @@ -0,0 +1,71 @@ +import _ from "lodash"; +import PropTypes from "prop-types"; +import React from "react"; + +function validateSpacesCfg({ replace }) { + if (_.isNull(replace) || "") { + return "Please enter a replacement value!"; + } + return null; +} + +function buildCode(col, { replace }) { + if (_.isNull(replace) || "") { + return null; + } + let replaceVal = replace; + if (_.toLower(replaceVal) === "nan") { + replaceVal = "np.nan"; + } else { + replaceVal = `'${replaceVal}'`; + } + return `df.loc[:, '${col}'] = df['${col}'].replace(r'^\\\\s+$', ${replaceVal}, regex=True)`; +} + +class Spaces extends React.Component { + constructor(props) { + super(props); + this.state = { replace: "nan" }; + this.updateState = this.updateState.bind(this); + } + + componentDidMount() { + // this is because when we initialize "Spaces" we already have enough state for a cfg + this.updateState({}); + } + + updateState(state) { + const currState = _.assignIn(this.state, state); + const cfg = _.pick(currState, ["replace"]); + this.setState(currState, () => + this.props.updateState({ + cfg, + code: buildCode(this.props.col, currState), + }) + ); + } + + render() { + return ( +
    + +
    + this.updateState({ replace: e.target.value })} + /> + {`To replace with missings, please enter the string "nan"`} +
    +
    + ); + } +} +Spaces.displayName = "Spaces"; +Spaces.propTypes = { + updateState: PropTypes.func, + col: PropTypes.string, +}; + +export { Spaces, validateSpacesCfg, buildCode }; diff --git a/static/popups/replacement/Strings.jsx b/static/popups/replacement/Strings.jsx new file mode 100644 index 000000000..7f9bcf574 --- /dev/null +++ b/static/popups/replacement/Strings.jsx @@ -0,0 +1,120 @@ +import _ from "lodash"; +import PropTypes from "prop-types"; +import React from "react"; + +function validateStringsCfg(cfg) { + const { value, replace } = cfg; + if (_.isNull(value)) { + return "Please enter a character or substring!"; + } + if (_.isNull(replace) || "") { + return "Please enter a replacement value!"; + } + return null; +} + +function buildCode(col, colType, { value, isChar, ignoreCase, replace }) { + if (_.isNull(value)) { + return null; + } + let flags = "re.UNICODE"; + if (ignoreCase) { + flags += " | re.IGNORECASE"; + } + let valStr = `' + re.escape('${value}') + '`; + if (isChar) { + valStr = `[${valStr}]+`; + } + + let replaceVal = replace; + if (_.toLower(replaceVal) === "nan") { + replaceVal = "np.nan"; + } else if (colType === "string") { + replaceVal = `'${replaceVal}'`; + } + + return [ + "import re", + "", + `regex_pat = re.compile(r'^ *${valStr} *$', flags=${flags})`, + `df.loc[:, '${col}'] = df['${col}'].replace(regex_pat, ${replaceVal}, regex=True)`, + ]; +} + +class Strings extends React.Component { + constructor(props) { + super(props); + this.state = { + value: null, + isChar: false, + ignoreCase: false, + replace: null, + }; + this.updateState = this.updateState.bind(this); + } + + updateState(state) { + const currState = _.assignIn(this.state, state); + const cfg = _.pick(currState, ["value", "isChar", "ignoreCase", "replace"]); + this.setState(currState, () => + this.props.updateState({ + cfg, + code: buildCode(this.props.col, this.props.colType, currState), + }) + ); + } + + render() { + return [ +
    + +
    + this.updateState({ value: e.target.value })} + /> +
    +
    , +
    + +
    + this.updateState({ isChar: !this.state.isChar })} + /> +
    +
    , +
    + +
    + this.updateState({ ignoreCase: !this.state.ignoreCase })} + /> +
    +
    , +
    + +
    + this.updateState({ replace: e.target.value })} + /> + {`To replace with missings, please enter the string "nan"`} +
    +
    , + ]; + } +} +Strings.displayName = "Strings"; +Strings.propTypes = { + updateState: PropTypes.func, + col: PropTypes.string, + colType: PropTypes.string, +}; + +export { Strings, validateStringsCfg, buildCode }; diff --git a/static/popups/replacement/Value.jsx b/static/popups/replacement/Value.jsx new file mode 100644 index 000000000..4de72ef89 --- /dev/null +++ b/static/popups/replacement/Value.jsx @@ -0,0 +1,273 @@ +import _ from "lodash"; +import PropTypes from "prop-types"; +import React from "react"; +import Select, { createFilter } from "react-select"; + +import { RemovableError } from "../../RemovableError"; +import { exports as gu } from "../../dtale/gridUtils"; +import { AGGREGATION_OPTS } from "../charts/Aggregations"; + +function validateValueCfg(cfgs) { + if (!_.size(cfgs)) { + return "Please add (+) a replacement!"; + } + return null; +} + +function validateCfg({ type, value, replace }, cfgs) { + if (_.isNull(value) || "") { + return "Please select a value to search for!"; + } + if (_.isNull(type)) { + return "Please select a type of replacement!"; + } + if (_.isNull(replace) || "") { + if (type === "raw") { + return "Please enter a raw value!"; + } else if (type === "col") { + return "Please select a column!"; + } + return "Please select an aggregation!"; + } + if (type === "raw" && _.find(cfgs, { type: "raw", value })) { + return `A replacement for ${value} already exists, please remove it before adding this one!`; + } + return null; +} + +function valConverter(val, colType, quote = "'") { + if (_.toLower(val) === "nan") { + return "np.nan"; + } else if (colType === "string") { + return `${quote}${val}${quote}`; + } + return val; +} + +function buildCode(col, colType, cfg) { + let code = [`s = df['${col}']`]; + const replacements = []; + const colReplacements = []; + _.forEach(cfg, ({ type, value, replace }) => { + if (_.isNull(value)) { + return; + } + const valStr = valConverter(value, colType); + if (type === "agg") { + replacements.push(`\t${valStr}: getattr(df['${col}'], '${replace}')(),`); + } else if (type === "raw") { + replacements.push(`\t${valStr}: ${valConverter(replace, colType)},`); + } else { + colReplacements.push(`s = np.where(s == ${valStr}, data['${replace}'], s)`); + } + }); + if (_.size(replacements)) { + code.push("s = s.replace({"); + code = _.concat(code, replacements); + code.push("})"); + } + if (_.size(colReplacements)) { + code = _.concat(code, colReplacements); + } + if (_.size(code) === 1) { + return null; + } + return code; +} + +const BASE_STATE = { + value: "nan", + col: null, + raw: null, + agg: null, + type: "raw", +}; + +class Value extends React.Component { + constructor(props) { + super(props); + this.state = { ...BASE_STATE, cfgs: [] }; + this.addCfg = this.addCfg.bind(this); + this.removeCfg = this.removeCfg.bind(this); + this.renderCfg = this.renderCfg.bind(this); + } + + addCfg() { + const { type, value } = this.state; + let replace; + let finalVal = value; + if (finalVal !== "nan" && this.props.colType === "float") { + finalVal = parseFloat(finalVal); + } else if (finalVal !== "nan" && this.props.colType === "int") { + finalVal = parseInt(finalVal); + } + if (type === "col") { + replace = _.get(this.state, "col.value") || null; + } else if (type === "raw") { + replace = this.state.raw; + if (replace !== "nan" && this.props.colType === "float") { + replace = parseFloat(replace); + } else if (replace !== "nan" && this.props.colType === "int") { + replace = parseInt(replace); + } + } else { + replace = _.get(this.state, "agg.value") || null; + } + const newCfg = { type, value: finalVal, replace }; + const error = validateCfg(newCfg, this.state.cfgs); + if (error) { + this.props.updateState({ error: }); + return; + } + const currCfgs = [...this.state.cfgs, newCfg]; + const code = buildCode(this.props.col, this.props.colType, currCfgs); + const cfg = { value: currCfgs }; + this.setState({ ...BASE_STATE, cfgs: currCfgs }, () => this.props.updateState({ error: null, cfg, code })); + } + + removeCfg(idx) { + this.setState({ cfgs: _.filter(this.state.cfgs, (_cfg, i) => i !== idx) }); + } + + renderCfg() { + return _.map(this.state.cfgs, ({ type, value, replace }, i) => { + let replaceStr; + if (type === "raw") { + replaceStr = valConverter(replace, this.props.colType, `"`); + } else if (type === "col") { + replaceStr = `values from column "${replace}"`; + } else { + replaceStr = `the ${replace} of column "${this.props.col}"`; + } + return ( +
    +
    +
    + this.removeCfg(i)} /> + + {"Search for "} + {valConverter(value, this.props.colType, `"`)} + {" and replace it with "} + {replaceStr} + +
    +
    + ); + }); + } + + render() { + const { col, colType } = this.props; + let filterTypes = [colType]; + if (_.includes(["int", "float"], colType)) { + filterTypes = ["int", "float"]; + } + const { type } = this.state; + let input = null; + const addBtn = ; + if (type === "col") { + const columns = _.map( + _.filter(this.props.columns || [], c => _.includes(filterTypes, gu.findColType(c.dtype))), + ({ name }) => ({ value: name }) + ); + const finalOptions = _.reject(columns, { value: col }); + input = ( +
    + this.setState({ raw: e.target.value })} + /> + {addBtn} +
    , + {`To replace with missings, please enter the string "nan"`}, + ]; + } else { + input = ( +
    + this.setState({ value: e.target.value })} + /> + {`To replace missings, please enter the string "nan"`} +
    +
    , +
    + +
    +
    +
    + {_.map(["raw", "agg", "col"], t => { + const buttonProps = { className: "btn" }; + if (t === type) { + buttonProps.className += " btn-primary active"; + } else { + buttonProps.className += " btn-primary inactive"; + buttonProps.onClick = () => this.setState({ type: t }); + } + return ( + + ); + })} +
    +
    {input}
    +
    +
    +
    , + ], + this.renderCfg() + ); + } +} +Value.displayName = "Value"; +Value.propTypes = { + updateState: PropTypes.func, + col: PropTypes.string, + columns: PropTypes.array, + colType: PropTypes.string, +}; + +export { Value, validateValueCfg, buildCode }; diff --git a/tests/dtale/test_replacements.py b/tests/dtale/test_replacements.py new file mode 100644 index 000000000..361ff5902 --- /dev/null +++ b/tests/dtale/test_replacements.py @@ -0,0 +1,180 @@ +import json + +import mock +import numpy as np +import pandas as pd +import pytest +import sklearn as skl +from pkg_resources import parse_version +from six import PY3 + +from dtale.app import build_app +from dtale.column_replacements import ColumnReplacement + +if PY3: + from contextlib import ExitStack +else: + from contextlib2 import ExitStack + +URL = 'http://localhost:40000' +app = build_app(url=URL) + + +def replacements_data(): + return pd.DataFrame.from_dict({ + 'a': ['a', 'UNknown', 'b'], + 'b': ['', ' ', ' - '], + 'c': [1, '', 3], + 'd': [1.1, np.nan, 3], + 'e': ['a', np.nan, 'b'] + }) + + +def verify_builder(builder, checker): + checker(builder.build_replacements()) + assert builder.build_code() + + +@pytest.mark.unit +def test_spaces(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'spaces' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + builder = ColumnReplacement(data_id, 'b', replacement_type, {}) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['', np.nan, ' - '])) + + builder = ColumnReplacement(data_id, 'b', replacement_type, {'value': 'blah'}) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['', 'blah', ' - '])) + + +@pytest.mark.unit +def test_string(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'strings' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + cfg = {'value': 'unknown', 'ignoreCase': True, 'isChar': False} + builder = ColumnReplacement(data_id, 'a', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['a', np.nan, 'b'])) + + cfg = {'value': 'unknown', 'ignoreCase': False, 'isChar': False} + builder = ColumnReplacement(data_id, 'a', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['a', 'UNknown', 'b'])) + + cfg = {'value': 'unknown', 'ignoreCase': True, 'isChar': False, 'replace': 'missing'} + builder = ColumnReplacement(data_id, 'a', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['a', 'missing', 'b'])) + + cfg = {'value': '-', 'ignoreCase': True, 'isChar': True} + builder = ColumnReplacement(data_id, 'b', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['', ' ', np.nan])) + + cfg = {'value': '-', 'ignoreCase': True, 'isChar': True, 'replace': 'missing'} + builder = ColumnReplacement(data_id, 'b', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['', ' ', 'missing'])) + + +@pytest.mark.unit +def test_value(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'value' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + cfg = {'value': [dict(value='nan', type='raw', replace='for test')]} + builder = ColumnReplacement(data_id, 'e', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['a', 'for test', 'b'])) + + cfg = {'value': [dict(value='nan', type='raw', replace='for test'), dict(value='a', type='raw', replace='d')]} + builder = ColumnReplacement(data_id, 'e', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), ['d', 'for test', 'b'])) + + cfg = {'value': [dict(value='nan', type='agg', replace='median')]} + builder = ColumnReplacement(data_id, 'd', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3])) + + +@pytest.mark.skipif(parse_version(skl.__version__) < parse_version('0.21.0'), reason="requires scikit-learn 0.21.0") +def test_iterative_imputers(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'imputer' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + cfg = {'type': 'iterative'} + builder = ColumnReplacement(data_id, 'd', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3])) + + +@pytest.mark.skipif(parse_version(skl.__version__) < parse_version('0.22.0'), reason="requires scikit-learn 0.22.0") +def test_knn_imputers(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'imputer' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + cfg = {'type': 'knn', 'n_neighbors': 3} + builder = ColumnReplacement(data_id, 'd', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3])) + + +@pytest.mark.skipif(parse_version(skl.__version__) < parse_version('0.20.0'), reason="requires scikit-learn 0.20.0") +def test_simple_imputers(unittest): + df = replacements_data() + data_id, replacement_type = '1', 'imputer' + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', {data_id: df})) + + cfg = {'type': 'simple'} + builder = ColumnReplacement(data_id, 'd', replacement_type, cfg) + verify_builder(builder, lambda col: unittest.assertEqual(list(col.values), [1.1, 2.05, 3])) + + +@pytest.mark.unit +def test_view(unittest): + from dtale.views import build_dtypes_state + + df = replacements_data() + with app.test_client() as c: + data = {c.port: df} + dtypes = {c.port: build_dtypes_state(df)} + with ExitStack() as stack: + stack.enter_context(mock.patch('dtale.global_state.DATA', data)) + stack.enter_context(mock.patch('dtale.global_state.DTYPES', dtypes)) + resp = c.get( + '/dtale/build-replacement/{}'.format(c.port), + query_string=dict(type='not_implemented', name='test', cfg=json.dumps({})) + ) + response_data = resp.json + assert response_data['error'] == "'not_implemented' replacement not implemented yet!" + + params = dict( + type='value', + col='e', + name='a', + cfg=json.dumps({'value': [dict(value='nan', type='raw', replace='for test')]}) + ) + resp = c.get('/dtale/build-replacement/{}'.format(c.port), query_string=params) + response_data = resp.json + assert response_data['error'] == "A column named 'a' already exists!" + + params = dict( + type='value', + col='e', + name='e2', + cfg=json.dumps({'value': [dict(value='nan', type='raw', replace='for test')]}) + ) + c.get('/dtale/build-replacement/{}'.format(c.port), query_string=params) + unittest.assertEqual(list(data[c.port]['e2'].values), ['a', 'for test', 'b']) + assert dtypes[c.port][-1]['name'] == 'e2' + assert dtypes[c.port][-1]['dtype'] == 'string' if PY3 else 'mixed' + assert not dtypes[c.port][-1]['hasMissing'] + + del params['name'] + c.get('/dtale/build-replacement/{}'.format(c.port), query_string=params) + unittest.assertEqual(list(data[c.port]['e'].values), ['a', 'for test', 'b']) + e_dtype = next((d for d in dtypes[c.port] if d['name'] == 'e')) + assert not e_dtype['hasMissing'] diff --git a/tests/dtale/test_views.py b/tests/dtale/test_views.py index 816395e3d..6e5e783f6 100644 --- a/tests/dtale/test_views.py +++ b/tests/dtale/test_views.py @@ -1832,7 +1832,8 @@ def test_jinja_output(): df = pd.DataFrame([1, 2, 3]) df, _ = views.format_data(df) - with build_app(url=URL).test_client() as c: + url = 'http://localhost.localdomain:40000' + with build_app(url=url).test_client() as c: with ExitStack() as stack: stack.enter_context(mock.patch('dtale.global_state.DATA', {c.port: df})) stack.enter_context(mock.patch('dtale.global_state.DTYPES', {c.port: views.build_dtypes_state(df)})) @@ -1842,7 +1843,7 @@ def test_jinja_output(): response = c.get('/charts/{}'.format(c.port)) assert 'span id="forkongithub"' not in str(response.data) - with build_app(url=URL, github_fork=True).test_client() as c: + with build_app(url=url, github_fork=True).test_client() as c: with ExitStack() as stack: stack.enter_context(mock.patch('dtale.global_state.DATA', {c.port: df})) stack.enter_context(mock.patch('dtale.global_state.DTYPES', {c.port: views.build_dtypes_state(df)}))