diff --git a/.circleci/config.yml b/.circleci/config.yml index 3e0e8e2a..b8646814 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -5,7 +5,7 @@ defaults: &defaults CIRCLE_ARTIFACTS: /tmp/circleci-artifacts CIRCLE_TEST_REPORTS: /tmp/circleci-test-results CODECOV_TOKEN: b0d35139-0a75-427a-907b-2c78a762f8f0 - VERSION: 1.7.9 + VERSION: 1.7.10 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases steps: - checkout diff --git a/CHANGES.md b/CHANGES.md index 3aa15c7f..5fc323e0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,12 @@ ## Changelog +### 1.7.10 (2020-2-27) + * [#75](https://github.com/man-group/dtale/issues/75), added code snippet functionality to the following: + * main grid, histogram, correlations, column building & charts + * exposed CLI loaders through the following functions dtale.show_csv, dtale.show_json, dtale.show_arctic + * build in such a way that it is easy for custom loaders to be exposed as well + * [#82](https://github.com/man-group/dtale/issues/82), pinned `future` package to be >= 0.14.0 + ### 1.7.9 (2020-2-24) * support for google colab * bugfixes: [#71](https://github.com/man-group/dtale/issues/71), [#72](https://github.com/man-group/dtale/issues/72), [#73](https://github.com/man-group/dtale/issues/73) diff --git a/docker/2_7/Dockerfile b/docker/2_7/Dockerfile index 4764e5dd..ce6dfccf 100644 --- a/docker/2_7/Dockerfile +++ b/docker/2_7/Dockerfile @@ -44,4 +44,4 @@ WORKDIR /app RUN set -eux \ ; . /root/.bashrc \ - ; easy_install dtale-1.7.9-py2.7.egg + ; easy_install dtale-1.7.10-py2.7.egg diff --git a/docker/3_6/Dockerfile b/docker/3_6/Dockerfile index 8294cf7f..a411a131 100644 --- a/docker/3_6/Dockerfile +++ b/docker/3_6/Dockerfile @@ -44,4 +44,4 @@ WORKDIR /app RUN set -eux \ ; . /root/.bashrc \ - ; easy_install dtale-1.7.9-py3.7.egg + ; easy_install dtale-1.7.10-py3.7.egg diff --git a/docs/source/conf.py b/docs/source/conf.py index 3d84c025..4f48bf1c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -64,9 +64,9 @@ # built documents. # # The short X.Y version. -version = u'1.7.9' +version = u'1.7.10' # The full version, including alpha/beta/rc tags. -release = u'1.7.9' +release = u'1.7.10' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/dtale/__init__.py b/dtale/__init__.py index cf42b7fd..52b5e5b7 100644 --- a/dtale/__init__.py +++ b/dtale/__init__.py @@ -4,3 +4,8 @@ # flake8: NOQA from dtale.app import show, get_instance, instances # isort:skip +from dtale.cli.loaders import LOADERS # isort:skip + +for loader_name, loader in LOADERS.items(): + if hasattr(loader, 'show_loader'): + globals()['show_{}'.format(loader_name)] = loader.show_loader diff --git a/dtale/app.py b/dtale/app.py index 77591ed8..248897ac 100644 --- a/dtale/app.py +++ b/dtale/app.py @@ -37,7 +37,7 @@ ACTIVE_HOST = None ACTIVE_PORT = None -SHORT_LIFE_PATHS = ['dist'] +SHORT_LIFE_PATHS = ['dist', 'dash'] SHORT_LIFE_TIMEOUT = 60 REAPER_TIMEOUT = 60.0 * 60.0 # one-hour diff --git a/dtale/charts/utils.py b/dtale/charts/utils.py index 6db11c47..d9e29f83 100644 --- a/dtale/charts/utils.py +++ b/dtale/charts/utils.py @@ -75,14 +75,19 @@ def _handler(col_def): if len(col_def_segs) > 1 and classify_type(dtypes[col_def_segs[0]]) == 'D': col, freq = col_def_segs if freq == 'WD': + code = "df.set_index('{col}').index.dayofweek.values" freq_grp = df.set_index(col).index.dayofweek.values elif freq == 'H2': + code = "df.set_index('{col}').index.hour.values" freq_grp = df.set_index(col).index.hour.values else: + code = "df.set_index('{col}').index.to_period('{freq}').to_timestamp(how='end').values" freq_grp = df.set_index(col).index.to_period(freq).to_timestamp(how='end').values + code = "\tpd.Series(" + code + ", index=df.index, name='{col_def}')," freq_grp = pd.Series(freq_grp, index=orig_idx, name=col_def) - return freq_grp - return df[col_def] + return freq_grp, code.format(col=col, freq=freq, col_def=col_def) + else: + return df[col_def], "\tdf['{col_def}'],".format(col_def=col_def) return _handler @@ -101,12 +106,21 @@ def retrieve_chart_data(df, x, y, z, group=None): :type z: str :param group: column(s) to use for grouping :type group: list of str or str - :return: dataframe of data required for chart constructiuon + :return: dataframe of data required for chart construction :rtype: :class:`pandas:pandas.DataFrame` """ freq_handler = date_freq_handler(df) cols = [x] + make_list(y) + [z] + make_list(group) - return pd.concat([freq_handler(c) for c in cols if c is not None], axis=1) + all_code = [] + all_data = [] + for col in cols: + if col is not None: + s, code = freq_handler(col) + all_data.append(s) + if code is not None: + all_code.append(code) + all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"] + return pd.concat(all_data, axis=1), all_code def check_all_nan(df, cols=None): @@ -183,13 +197,29 @@ def build_agg_data(df, x, y, inputs, agg, z=None): window, comp = map(inputs.get, ['rolling_win', 'rolling_comp']) agg_df = df.set_index(x).rolling(window=window) agg_df = pd.DataFrame({c: getattr(agg_df[c], comp)() for c in y}) - return agg_df.reset_index() + agg_df = agg_df.reset_index() + code = [ + "chart_data = chart_data.set_index('{x}').rolling(window={window})".format(x=x, window=window), + "chart_data = pd.DataFrame({'" + ', '.join( + ["'{c}': chart_data['{c}'].{comp}()".format(c=c, comp=comp) for c in y] + ) + '})', + "chart_data = chart_data.reset_index()" + ] + return agg_df, code if z_exists: groups = df.groupby([x] + make_list(y)) - return getattr(groups[make_list(z)], agg)().reset_index() + return getattr(groups[make_list(z)], agg)().reset_index(), [ + "chart_data = chart_data.groupby(['{cols}'])[['{z}']].{agg}().reset_index()".format( + cols="', '".join([x] + make_list(y)), z=z, agg=agg + ) + ] groups = df.groupby(x) - return getattr(groups[y], agg)().reset_index() + return getattr(groups[y], agg)().reset_index(), [ + "chart_data = chart_data.groupby('{x}')[['{y}']].{agg}().reset_index()".format( + x=x, y=y, agg=agg + ) + ] def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False, **kwargs): @@ -216,7 +246,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False :return: dict """ - data = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col) + data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col) x_col = str('x') y_cols = make_list(y) z_col = kwargs.get('z') @@ -225,11 +255,16 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False z_cols = [z_col] if group_col is not None and len(group_col): data = data.sort_values(group_col + [x]) + code.append("chart_data = chart_data.sort_values(['{cols}'])".format(cols="', '".join(group_col + [x]))) check_all_nan(data, [x] + y_cols) data = data.rename(columns={x: x_col}) + code.append("chart_data = chart_data.rename(columns={'" + x + "': '" + x_col + "'})") if agg is not None: data = data.groupby(group_col + [x_col]) data = getattr(data, agg)().reset_index() + code.append("chart_data = chart_data.groupby(['{cols}']).{agg}().reset_index()".format( + cols="', '".join(group_col + [x]), agg=agg + )) max_groups = 15 if len(data[group_col].drop_duplicates()) > max_groups: msg = ( @@ -239,6 +274,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False raise Exception(msg) data = data.dropna() + code.append("chart_data = chart_data.dropna()") data_f, range_f = build_formatters(data) ret_data = dict( data={}, @@ -255,15 +291,19 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False ]) ret_data['data'][group_val] = data_f.format_lists(grp) ret_data['dtypes'] = {c: classify_type(dtype) for c, dtype in dtypes.items()} - return ret_data + return ret_data, code sort_cols = [x] + (y_cols if len(z_cols) else []) data = data.sort_values(sort_cols) + code.append("chart_data = chart_data.sort_values(['{cols}'])".format(cols="', '".join(sort_cols))) check_all_nan(data, [x] + y_cols + z_cols) y_cols = [str(y_col) for y_col in y_cols] data.columns = [x_col] + y_cols + z_cols + code.append("chart_data.columns = ['{cols}']".format(cols="', '".join([x_col] + y_cols + z_cols))) if agg is not None: - data = build_agg_data(data, x_col, y_cols, kwargs, agg, z=z_col) + data, agg_code = build_agg_data(data, x_col, y_cols, kwargs, agg, z=z_col) + code += agg_code data = data.dropna() + code.append("chart_data = chart_data.dropna()") dupe_cols = [x_col] + (y_cols if len(z_cols) else []) check_exceptions(data[dupe_cols].rename(columns={'x': x}), allow_duplicates, @@ -274,7 +314,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False min={col: fmt(data[col].min(), None) for _, col, fmt in range_f.fmts if col in [x_col] + y_cols + z_cols}, max={col: fmt(data[col].max(), None) for _, col, fmt in range_f.fmts if col in [x_col] + y_cols + z_cols} ) - return ret_data + return ret_data, code WEEKDAY_MAP = {idx: day for idx, day in enumerate(['Mon', 'Tues', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'])} diff --git a/dtale/cli/loaders/arctic_loader.py b/dtale/cli/loaders/arctic_loader.py index 5854f123..f3c88802 100644 --- a/dtale/cli/loaders/arctic_loader.py +++ b/dtale/cli/loaders/arctic_loader.py @@ -1,8 +1,8 @@ -from builtins import map from logging import getLogger import pandas as pd +from dtale.app import show from dtale.cli.clickutils import get_loader_options logger = getLogger(__name__) @@ -22,6 +22,29 @@ ] +# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end. +def show_loader(**kwargs): + return show(data_loader=lambda: loader_func(**kwargs), **kwargs) + + +def loader_func(**kwargs): + try: + from arctic import Arctic + from arctic.store.versioned_item import VersionedItem + except ImportError: + raise ImportError('In order to use the arctic loader you must install arctic!') + host = Arctic(kwargs.get('host')) + lib = host.get_library(kwargs.get('library')) + read_kwargs = {} + start, end = (kwargs.get(p) for p in ['start', 'end']) + if start and end: + read_kwargs['chunk_range'] = pd.date_range(start, end) + data = lib.read(kwargs.get('node'), **read_kwargs) + if isinstance(data, VersionedItem): + data = data.data + return data + + # IMPORTANT!!! This function is required for building any customized CLI loader. def find_loader(kwargs): """ @@ -33,23 +56,9 @@ def find_loader(kwargs): """ arctic_opts = get_loader_options(LOADER_KEY, kwargs) if len([f for f in arctic_opts.values() if f]): - try: - from arctic import Arctic - from arctic.store.versioned_item import VersionedItem - except ImportError: - raise ImportError('In order to use the --arctic loader you must install arctic!') def _arctic_loader(): - host = Arctic(arctic_opts['host']) - lib = host.get_library(arctic_opts['library']) - read_kwargs = {} - start, end = map(arctic_opts.get, ['start', 'end']) - if start and end: - read_kwargs['chunk_range'] = pd.date_range(start, end) - data = lib.read(arctic_opts['node'], **read_kwargs) - if isinstance(data, VersionedItem): - data = data.data - return data + return loader_func(**arctic_opts) return _arctic_loader return None diff --git a/dtale/cli/loaders/csv_loader.py b/dtale/cli/loaders/csv_loader.py index 6228b18d..ed1275b5 100644 --- a/dtale/cli/loaders/csv_loader.py +++ b/dtale/cli/loaders/csv_loader.py @@ -1,5 +1,6 @@ import pandas as pd +from dtale.app import show from dtale.cli.clickutils import get_loader_options ''' @@ -13,6 +14,16 @@ ] +# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end. +def show_loader(**kwargs): + return show(data_loader=lambda: loader_func(**kwargs), **kwargs) + + +def loader_func(**kwargs): + path = kwargs.pop('path') + return pd.read_csv(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS}) + + # IMPORTANT!!! This function is required for building any customized CLI loader. def find_loader(kwargs): """ @@ -28,7 +39,7 @@ def _csv_loader(): csv_arg_parsers = { # TODO: add additional arg parsers 'parse_dates': lambda v: v.split(',') if v else None } - kwargs = {k: csv_arg_parsers.get(k, lambda v: v)(v) for k, v in csv_opts.items() if k != 'path'} - return pd.read_csv(csv_opts['path'], **kwargs) + kwargs = {k: csv_arg_parsers.get(k, lambda v: v)(v) for k, v in csv_opts.items()} + return loader_func(**kwargs) return _csv_loader return None diff --git a/dtale/cli/loaders/json_loader.py b/dtale/cli/loaders/json_loader.py index 4cb1d168..abdbba0c 100644 --- a/dtale/cli/loaders/json_loader.py +++ b/dtale/cli/loaders/json_loader.py @@ -1,5 +1,7 @@ import pandas as pd +import requests +from dtale.app import show from dtale.cli.clickutils import get_loader_options ''' @@ -13,6 +15,26 @@ ] +# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end. +def show_loader(**kwargs): + return show(data_loader=lambda: loader_func(**kwargs), **kwargs) + + +def loader_func(**kwargs): + path = kwargs.pop('path') + normalize = kwargs.pop('normalize', False) + if path.startswith('http://') or path.startswith('https://'): # add support for URLs + proxy = kwargs.pop('proxy', None) + req_kwargs = {} + if proxy is not None: + req_kwargs['proxies'] = dict(http=proxy, https=proxy) + resp = requests.get(path, **req_kwargs) + path = resp.json() if normalize else resp.text + if normalize: + return pd.io.json.json_normalize(path, **kwargs) + return pd.read_json(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS}) + + # IMPORTANT!!! This function is required for building any customized CLI loader. def find_loader(kwargs): """ @@ -28,7 +50,7 @@ def _json_loader(): json_arg_parsers = { # TODO: add additional arg parsers 'parse_dates': lambda v: v.split(',') if v else None } - kwargs = {k: json_arg_parsers.get(k, lambda v: v)(v) for k, v in json_opts.items() if k != 'path'} - return pd.read_json(json_opts['path'], **kwargs) + kwargs = {k: json_arg_parsers.get(k, lambda v: v)(v) for k, v in json_opts.items()} + return loader_func(**kwargs) return _json_loader return None diff --git a/dtale/column_builders.py b/dtale/column_builders.py new file mode 100644 index 00000000..87cd8121 --- /dev/null +++ b/dtale/column_builders.py @@ -0,0 +1,131 @@ +import numpy as np +import pandas as pd + +import dtale.global_state as global_state + + +class ColumnBuilder(object): + + def __init__(self, data_id, column_type, name, cfg): + self.data_id = data_id + if column_type == 'numeric': + self.builder = NumericColumnBuilder(name, cfg) + elif column_type == 'datetime': + self.builder = DatetimeColumnBuilder(name, cfg) + elif column_type == 'bins': + self.builder = BinsColumnBuilder(name, cfg) + else: + raise NotImplementedError('{} column builder not implemented yet!'.format(column_type)) + + def build_column(self): + data = global_state.get_data(self.data_id) + return self.builder.build_column(data) + + def build_code(self): + return self.builder.build_code() + + +class NumericColumnBuilder(object): + + def __init__(self, name, cfg): + self.name = name + self.cfg = cfg + + def build_column(self, data): + left, right, operation = (self.cfg.get(p) for p in ['left', 'right', 'operation']) + left = data[left['col']] if 'col' in left else float(left['val']) + right = data[right['col']] if 'col' in right else float(right['val']) + if operation == 'sum': + return left + right + if operation == 'difference': + return left - right + if operation == 'multiply': + return left * right + if operation == 'divide': + return left / right + return np.nan + + def build_code(self): + left, right, operation = (self.cfg.get(p) for p in ['left', 'right', 'operation']) + operations = dict(sum='+', difference='-', multiply='*', divide='/') + return "df.loc[:, '{name}'] = {left} {operation} {right}".format( + name=self.name, + operation=operations.get(operation), + left="df['{}']".format(left['col']) if 'col' in left else left['val'], + right="df['{}']".format(right['col']) if 'col' in right else right['val'] + ) + + +FREQ_MAPPING = dict(month='M', quarter='Q', year='Y') + + +class DatetimeColumnBuilder(object): + + def __init__(self, name, cfg): + self.name = name + self.cfg = cfg + + def build_column(self, data): + col = self.cfg['col'] + if 'property' in self.cfg: + return getattr(data[col].dt, self.cfg['property']) + conversion_key = self.cfg['conversion'] + [freq, how] = conversion_key.split('_') + freq = FREQ_MAPPING[freq] + conversion_data = data[[col]].set_index(col).index.to_period(freq).to_timestamp(how=how).normalize() + return pd.Series(conversion_data, index=data.index, name=self.name) + + def build_code(self): + if 'property' in self.cfg: + return "df.loc[:, '{name}'] = df['{col}'].dt.{property}".format(name=self.name, **self.cfg) + conversion_key = self.cfg['conversion'] + [freq, how] = conversion_key.split('_') + freq = FREQ_MAPPING[freq] + return ( + "{name}_data = data[['{col}']].set_index('{col}').index.to_period('{freq}')'" + ".to_timestamp(how='{how}').normalize()\n" + "df.loc[:, '{name}'] = pd.Series({name}_data, index=df.index, name='{name}')" + ).format(name=self.name, col=self.cfg['col'], freq=freq, how=how) + + +class BinsColumnBuilder(object): + + def __init__(self, name, cfg): + self.name = name + self.cfg = cfg + + def build_column(self, data): + col, operation, bins, labels = (self.cfg.get(p) for p in ['col', 'operation', 'bins', 'labels']) + bins = int(bins) + if operation == 'cut': + bin_data = pd.cut(data[col], bins=bins) + else: + bin_data = pd.qcut(data[col], q=bins) + if labels: + cats = {idx: str(cat) for idx, cat in enumerate(labels.split(','))} + else: + cats = {idx: str(cat) for idx, cat in enumerate(bin_data.cat.categories)} + return pd.Series(bin_data.cat.codes.map(cats), index=data.index, name=self.name) + + def build_code(self): + col, operation, bins, labels = (self.cfg.get(p) for p in ['col', 'operation', 'bins', 'labels']) + bins_code = [] + if operation == 'cut': + bins_code.append("{name}_data = pd.cut(df['{col}'], bins={bins})".format( + name=self.name, col=col, bins=bins + )) + else: + bins_code.append("{name}_data = pd.qcut(df['{col}'], bins={bins})".format( + name=self.name, col=col, bins=bins + )) + if labels: + labels_str = ', '.join(['{}: {}'.format(idx, cat) for idx, cat in enumerate(labels.split(','))]) + labels_str = '{' + labels_str + '}' + bins_code.append('{name}_cats = {labels}'.format(name=self.name, labels=labels_str)) + else: + bins_code.append( + '{name}_cats = {idx: str(cat) for idx, cat in enumerate({name}_data.cat.categories)}' + ) + s_str = "df.loc[:, '{name}'] = pd.Series({name}_data.cat.codes.map({name}_cats), index=df.index, name='{name}')" + bins_code.append(s_str.format(name=self.name)) + return '\n'.join(bins_code) diff --git a/dtale/dash_application/charts.py b/dtale/dash_application/charts.py index 42ebb4d9..0240d9da 100644 --- a/dtale/dash_application/charts.py +++ b/dtale/dash_application/charts.py @@ -19,8 +19,9 @@ weekday_tick_handler) from dtale.dash_application.layout import (AGGS, build_error, update_label_for_freq) -from dtale.utils import (classify_type, dict_merge, divide_chunks, - flatten_lists, get_dtypes, make_list, run_query) +from dtale.utils import (build_code_export, classify_type, dict_merge, + divide_chunks, flatten_lists, get_dtypes, make_list, + run_query) from dtale.views import build_chart as build_chart_data @@ -155,11 +156,27 @@ def chart_wrapper(data_id, data, url_params=None): [html.I(className='far fa-window-restore mr-4'), html.Span('Popup Chart')], href='/charts/{}?{}'.format(data_id, url_params_func(params)), target='_blank', - style={'position': 'absolute', 'zIndex': 5} + className='mr-5' ) + copy_link = html.Div( + [html.A( + [html.I(className='ico-link mr-4'), html.Span('Copy Link')], + href='/charts/{}?{}'.format(data_id, url_params_func(params)), + target='_blank', + className='mr-5 copy-link-btn' + ), html.Div('Copied to clipboard', className="hoverable__content copy-tt-bottom") + ], + className='hoverable-click' + ) + code_snippet = html.A( + [html.I(className='ico-code mr-4'), html.Span('Code Snippet')], + href='#', + className='code-snippet-btn', + ) + links = html.Div([popup_link, copy_link, code_snippet], style={'position': 'absolute', 'zIndex': 5}) def _chart_wrapper(chart): - return html.Div([popup_link, chart], style={'position': 'relative'}) + return html.Div([links, chart], style={'position': 'relative'}) return _chart_wrapper @@ -607,20 +624,21 @@ def heatmap_builder(data_id, **inputs): :return: heatmap :rtype: :plotly:`plotly.graph_objects.Heatmap ` """ - + code = None try: if not valid_chart(**inputs): - return None + return None, None raw_data = global_state.get_data(data_id) wrapper = chart_wrapper(data_id, raw_data, inputs) hm_kwargs = dict(hoverongaps=False, colorscale='Greens', showscale=True, hoverinfo='x+y+z') x, y, z, agg = (inputs.get(p) for p in ['x', 'y', 'z', 'agg']) y = y[0] - data = retrieve_chart_data(raw_data, x, y, z) + data, code = retrieve_chart_data(raw_data, x, y, z) x_title = update_label_for_freq(x) y_title = update_label_for_freq(y) z_title = z data = data.sort_values([x, y]) + code.append("chart_data = chart_data.sort_values(['{x}, '{y}'])".format(x=x, y=y)) check_all_nan(data) dupe_cols = [x, y] if agg is not None: @@ -629,17 +647,29 @@ def heatmap_builder(data_id, **inputs): data = data.dropna() data = data.set_index([x, y]).unstack().corr() data = data.stack().reset_index(0, drop=True) + code.append(( + "chart_data = chart_data.dropna()\n" + "chart_data = chart_data.set_index(['{x}', '{y}']).unstack().corr()\n" + "chart_data = chart_data.stack().reset_index(0, drop=True)" + ).format(x=x, y=y)) y_title = x_title dupe_cols = ['{}{}'.format(col, i) for i, col in enumerate(data.index.names)] [x, y] = dupe_cols data.index.names = dupe_cols data = data.reset_index() data.loc[data[x] == data[y], z] = np.nan + code.append(( + "chart_data.index.names = ['{x}', '{y}']\n" + "chart_data = chart_data.reset_index()\n" + "chart_data.loc[chart_data['{x}'] == chart_data['{y}'], '{z}'] = np.nan" + ).format(x=x, y=y, z=z)) + hm_kwargs = dict_merge( hm_kwargs, dict(colorscale=[[0, 'red'], [0.5, 'yellow'], [1.0, 'green']], zmin=-1, zmax=1) ) else: - data = build_agg_data(data, x, y, inputs, agg, z=z) + data, agg_code = build_agg_data(data, x, y, inputs, agg, z=z) + code += agg_code if not len(data): raise Exception('No data returned for this computation!') check_exceptions(data[dupe_cols], agg != 'corr', data_limit=40000, @@ -650,6 +680,11 @@ def heatmap_builder(data_id, **inputs): data = data.sort_values([x, y]) data = data.set_index([x, y]) data = data.unstack(0)[z] + code.append(( + "chart_data = data.sort_values(['{x}', '{y}'])\n" + "chart_data = chart_data.set_index(['{x}', '{y}'])\n" + "chart_data == unstack(0)['{z}']" + ).format(x=x, y=y, z=z)) x_data = weekday_tick_handler(data.columns, x) y_data = weekday_tick_handler(data.index.values, y) @@ -678,9 +713,9 @@ def heatmap_builder(data_id, **inputs): build_title(x, y, z=z, agg=agg) )) ) - )) + )), code except BaseException as e: - return build_error(str(e), str(traceback.format_exc())) + return build_error(str(e), str(traceback.format_exc())), code def build_figure_data(data_id, chart_type=None, query=None, x=None, y=None, z=None, group=None, agg=None, window=None, @@ -714,25 +749,27 @@ def build_figure_data(data_id, chart_type=None, query=None, x=None, y=None, z=No :return: dictionary of series data, min/max ranges of columns used in chart :rtype: dict """ + code = None try: if not valid_chart(**dict(x=x, y=y, z=z, chart_type=chart_type, agg=agg, window=window, rolling_comp=rolling_comp)): - return None + return None, None data = run_query( global_state.get_data(data_id), query, global_state.get_context_variables(data_id) ) + code = build_code_export(data_id, query=query) chart_kwargs = dict(group_col=group, agg=agg, allow_duplicates=chart_type == 'scatter', rolling_win=window, rolling_comp=rolling_comp) if chart_type in ZAXIS_CHARTS: chart_kwargs['z'] = z del chart_kwargs['group_col'] - data = build_chart_data(data, x, y, **chart_kwargs) - return data + data, chart_code = build_chart_data(data, x, y, **chart_kwargs) + return data, code + chart_code except BaseException as e: - return dict(error=str(e), traceback=str(traceback.format_exc())) + return dict(error=str(e), traceback=str(traceback.format_exc())), code def build_chart(data_id=None, **inputs): @@ -756,17 +793,19 @@ def build_chart(data_id=None, **inputs): :return: plotly chart object(s) :rtype: type of (:dash:`dash_core_components.Graph `, dict) """ + code = None try: if inputs.get('chart_type') == 'heatmap': - data = heatmap_builder(data_id, **inputs) - return data, None + data, code = heatmap_builder(data_id, **inputs) + return data, None, code - data = build_figure_data(data_id, **inputs) + data, code = build_figure_data(data_id, **inputs) if data is None: - return None, None + return None, None, None + code = '\n'.join(code or []) if 'error' in data: - return build_error(data['error'], data['traceback']), None + return build_error(data['error'], data['traceback']), None, code range_data = dict(min=data['min'], max=data['max']) axis_inputs = inputs.get('yaxis', {}) @@ -778,7 +817,8 @@ def build_chart(data_id=None, **inputs): if chart_type == 'wordcloud': return ( chart_builder(dash_components.Wordcloud(id='wc', data=data, y=y, group=inputs.get('group'))), - range_data + range_data, + code ) axes_builder = build_axes(data_id, x, axis_inputs, data['min'], data['max'], z=z, agg=agg) @@ -790,22 +830,22 @@ def build_chart(data_id=None, **inputs): ]) else: scatter_charts = scatter_builder(data, x, y, axes_builder, chart_builder, agg=agg) - return cpg_chunker(scatter_charts), range_data + return cpg_chunker(scatter_charts), range_data, code if chart_type == '3d_scatter': - return scatter_builder(data, x, y, axes_builder, chart_builder, z=z, agg=agg), range_data + return scatter_builder(data, x, y, axes_builder, chart_builder, z=z, agg=agg), range_data, code if chart_type == 'surface': - return surface_builder(data, x, y, z, axes_builder, chart_builder, agg=agg), range_data + return surface_builder(data, x, y, z, axes_builder, chart_builder, agg=agg), range_data, code if chart_type == 'bar': - return bar_builder(data, x, y, axes_builder, chart_builder, **chart_inputs), range_data + return bar_builder(data, x, y, axes_builder, chart_builder, **chart_inputs), range_data, code if chart_type == 'line': - return line_builder(data, x, y, axes_builder, chart_builder, **chart_inputs), range_data + return line_builder(data, x, y, axes_builder, chart_builder, **chart_inputs), range_data, code if chart_type == 'pie': - return pie_builder(data, x, y, chart_builder, **chart_inputs), range_data + return pie_builder(data, x, y, chart_builder, **chart_inputs), range_data, code raise NotImplementedError('chart type: {}'.format(chart_type)) except BaseException as e: - return build_error(str(e), str(traceback.format_exc())), None + return build_error(str(e), str(traceback.format_exc())), None, code diff --git a/dtale/dash_application/layout.py b/dtale/dash_application/layout.py index 4c9748f3..cfb10972 100644 --- a/dtale/dash_application/layout.py +++ b/dtale/dash_application/layout.py @@ -320,6 +320,7 @@ def charts_layout(df, settings, **inputs): dcc.Store(id='range-data'), dcc.Store(id='yaxis-data', data=inputs.get('yaxis')), dcc.Store(id='last-chart-input-data', data=inputs), + dcc.Input(id='chart-code', type='hidden'), html.Div(html.Div(dcc.Tabs( id='chart-tabs', value=chart_type or 'line', @@ -453,4 +454,5 @@ def charts_layout(df, settings, **inputs): className='row pt-3 pb-5 charts-filters' ), dcc.Loading(html.Div(id='chart-content'), type='circle'), + dcc.Textarea(id="copy-text", style=dict(position='absolute', left='-110%')) ], className='charts-body') diff --git a/dtale/dash_application/views.py b/dtale/dash_application/views.py index 6e9a3925..5ecb7560 100644 --- a/dtale/dash_application/views.py +++ b/dtale/dash_application/views.py @@ -33,7 +33,9 @@ def __init__(self, *args, **kwargs): kwargs['external_stylesheets'] = ['/css/main.css', '/css/dash.css'] if server.config['GITHUB_FORK']: kwargs['external_stylesheets'].append('/css/github_fork.css') - kwargs['external_scripts'] = ['/dash/components_bundle.js', '/dist/base_styles_bundle.js'] + kwargs['external_scripts'] = [ + '/dash/components_bundle.js', '/dash/custom_bundle.js', '/dist/base_styles_bundle.js' + ] super(DtaleDash, self).__init__(*args, **kwargs) @@ -243,7 +245,12 @@ def chart_input_data(cpg, barmode, barsort): return dict(cpg=cpg, barmode=barmode, barsort=barsort) @dash_app.callback( - [Output('chart-content', 'children'), Output('last-chart-input-data', 'data'), Output('range-data', 'data')], + [ + Output('chart-content', 'children'), + Output('last-chart-input-data', 'data'), + Output('range-data', 'data'), + Output('chart-code', 'value'), + ], # Since we use the data prop in an output, # we cannot get the initial data on load with the data prop. # To counter this, you can use the modified_timestamp @@ -270,8 +277,8 @@ def on_data(_ts1, _ts2, _ts3, pathname, inputs, chart_inputs, yaxis_data, last_c all_inputs = dict_merge(inputs, chart_inputs, dict(yaxis=yaxis_data or {})) if all_inputs == last_chart_inputs: raise PreventUpdate - charts, range_data = build_chart(get_data_id(pathname), **all_inputs) - return charts, all_inputs, range_data + charts, range_data, code = build_chart(get_data_id(pathname), **all_inputs) + return charts, all_inputs, range_data, code @dash_app.callback( [Output('yaxis-min-input', 'value'), Output('yaxis-max-input', 'value')], diff --git a/dtale/global_state.py b/dtale/global_state.py index bb230c7c..943a7592 100644 --- a/dtale/global_state.py +++ b/dtale/global_state.py @@ -4,6 +4,7 @@ SETTINGS = {} METADATA = {} CONTEXT_VARIABLES = {} +HISTORY = {} def get_data(data_id=None): @@ -46,6 +47,14 @@ def get_context_variables(data_id=None): return CONTEXT_VARIABLES.get(data_id) +def get_history(data_id=None): + global HISTORY + + if data_id is None: + return HISTORY + return HISTORY.get(data_id) + + def set_data(data_id, val): global DATA @@ -76,17 +85,28 @@ def set_context_variables(data_id, val): CONTEXT_VARIABLES[data_id] = val -def cleanup(): +def set_history(data_id, val): + global HISTORY + + HISTORY[data_id] = val + + +def cleanup(data_id=None): """ Helper function for cleanup up state related to a D-Tale process with a specific port :param port: integer string for a D-Tale process's port :type port: str """ - global DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES + global DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES, HISTORY - DATA = {} - SETTINGS = {} - DTYPES = {} - METADATA = {} - CONTEXT_VARIABLES = {} + if data_id is None: + DATA = {} + SETTINGS = {} + DTYPES = {} + METADATA = {} + CONTEXT_VARIABLES = {} + HISTORY = {} + else: + for data in [DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES, HISTORY]: + data.pop(data_id, None) # use dict.pop with a default so that KeyError won't occur diff --git a/dtale/static/css/main.css b/dtale/static/css/main.css index d8964f1c..0b56278b 100644 --- a/dtale/static/css/main.css +++ b/dtale/static/css/main.css @@ -10104,6 +10104,10 @@ select.form-control:focus, position: relative; border-bottom: dashed 1px #004c93; } +.hoverable-click { + display: inline; + position: relative; +} .hoverable:visited { color: #044c8f; @@ -10126,7 +10130,8 @@ select.form-control:focus, display: block; } -.hoverable .hoverable__content { +.hoverable .hoverable__content, +.hoverable-click .hoverable__content{ display: none; } @@ -10141,6 +10146,18 @@ select.form-control:focus, text-align: left; color: #404040; } +.hoverable__content.copy-tt-bottom { + padding: .5em .5em; + width: 10em; + text-align: center; +} +.hoverable__content.copy-tt-top { + padding: .5em .5em; + width: 10em; + text-align: center; + top: unset; + bottom: 110%; +} div.hoverable.label { border-bottom: none; @@ -10163,6 +10180,15 @@ div.hoverable.label > div.hoverable__content { border-right: 0.5em solid transparent; border-left: 0.5em solid transparent; } +.hoverable__content.copy-tt-top::before { + bottom: unset; + top: 95%; + -moz-transform: rotate(180deg); + -webkit-transform: rotate(180deg); + -ms-transform: rotate(180deg); + -o-transform: rotate(180deg); + transform: rotate(180deg); +} .hoverable__content::after { position: absolute; @@ -10177,6 +10203,15 @@ div.hoverable.label > div.hoverable__content { border-right: 0.5em solid transparent; border-left: 0.5em solid transparent; } +.hoverable__content.copy-tt-top::after { + bottom: unset; + top: calc(92% + .1em); + -moz-transform: rotate(180deg); + -webkit-transform: rotate(180deg); + -ms-transform: rotate(180deg); + -o-transform: rotate(180deg); + transform: rotate(180deg); +} .tooltip-wrapper { display: inline-block; @@ -10443,6 +10478,12 @@ div.container-fluid.describe > div#popup-content > div.modal-body { height: 450px; } +div.container-fluid.code-popup > div#popup-content > div.modal-footer { + position: absolute; + bottom: 0; + width: 100%; +} + @media (min-height: 330px) { div.container-fluid.build > div#popup-content > div.modal-footer { position: absolute; diff --git a/dtale/templates/dtale/code_popup.html b/dtale/templates/dtale/code_popup.html new file mode 100644 index 00000000..34437d3f --- /dev/null +++ b/dtale/templates/dtale/code_popup.html @@ -0,0 +1,45 @@ + + + + + + + + + D-Tale Code Snippet + + {# + Despite the fact we reload these again later on, in order for the header to rendered correctly + before the rest of the page is built we need to load them here as well. + #} + + + + {% if config.GITHUB_FORK %} + + + Fork me on GitHub + + {% endif %} +
+
+
+
+ D-TALE + - + +
+
+
+
+ + + + {# + In order to get styles to load correctly, we need to reload these files. + In CSS, the last style declared takes precedence. + #} + + + \ No newline at end of file diff --git a/dtale/utils.py b/dtale/utils.py index 970261c9..af5ebede 100644 --- a/dtale/utils.py +++ b/dtale/utils.py @@ -16,6 +16,8 @@ from past.utils import old_div from six import PY3 +import dtale.global_state as global_state + logger = getLogger(__name__) @@ -732,7 +734,7 @@ def divide_chunks(l, n): yield l[i:i + n] -def run_query(df, query, context_vars): +def run_query(df, query, context_vars=None): """ Utility function for running :func:`pandas:pandas.DataFrame.query` . This function contains extra logic to handle when column names contain special characters. Looks like pandas will be handling this in a future @@ -745,7 +747,7 @@ def run_query(df, query, context_vars): :param query: query string :type query: str :param context_vars: dictionary of user-defined variables which can be referenced by name in query strings - :type context_vars: dict + :type context_vars: dict, optional :return: filtered dataframe """ if (query or '') == '': @@ -765,12 +767,12 @@ def run_query(df, query, context_vars): replacements[cn] = r inv_replacements = {replacements[k]: k for k in replacements.keys()} - df = df.rename(columns=replacements) # Rename the columns + df = df.rename(columns=replacements) - df = df.query(final_query, local_dict=context_vars) # Carry out query + df = df.query(final_query, local_dict=context_vars or {}) df = df.rename(columns=inv_replacements) else: - df = df.query(query, local_dict=context_vars) + df = df.query(query, local_dict=context_vars or {}) if not len(df): raise Exception('query "{}" found no data, please alter'.format(query)) @@ -778,6 +780,65 @@ def run_query(df, query, context_vars): class DuplicateDataError(Exception): + """ + Exception for signalling that similar data is trying to be loaded to D-Tale again. Is this correct? + """ def __init__(self, data_id): super(DuplicateDataError, self).__init__("Duplicate Data") self.data_id = data_id + + +def build_code_export(data_id, imports='import pandas as pd\n\n', query=None): + """ + Helper function for building a string representing the code that was run to get the data you are viewing to that + point. + + :param data_id: integer string identifier for a D-Tale process's data + :type data_id: str + :param imports: string representing the imports at the top of the code string + :type imports: string, optional + :param query: pandas dataframe query string + :type query: str, optional + :return: python code string + """ + history = global_state.get_history(data_id) or [] + settings = global_state.get_settings(data_id) or {} + ctxt_vars = global_state.get_context_variables(data_id) + + startup_str = ( + "# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'\n\n" + '{imports}' + 'if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):\n' + '\tdf = df.to_frame(index=False)\n\n' + '# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required\n' + "df = df.reset_index().drop('index', axis=1, errors='ignore')\n" + 'df.columns = [str(c) for c in df.columns] # update columns to strings in case they are numbers\n' + ).format(imports=imports) + final_history = [startup_str] + history + final_query = query + if final_query is None: + final_query = settings.get('query') + + if final_query is not None: + if len(ctxt_vars or {}): + final_history.append(( + "\n# this is injecting any context variables you may have passed into 'dtale.show'\n" + "import dtale.global_state as dtale_global_state\n" + "\n# DISCLAIMER: running this line in a different process than the one it originated will produce\n" + "# differing results\n" + "ctxt_vars = dtale_global_state.get_context_variables('{data_id}')\n\n" + "df = df.query('{query}', local_dict=ctx_vars)\n" + ).format(query=final_query, data_id=data_id)) + else: + final_history.append("df = df.query('{}')\n".format(final_query)) + elif 'query' in settings: + final_history.append("df = df.query('{}')\n".format(settings['query'])) + if 'sort' in settings: + cols, dirs = [], [] + for col, dir in settings['sort']: + cols.append(col) + dirs.append('True' if dir == 'ASC' else 'False') + final_history.append("df = df.sort_values(['{cols}'], ascending=[{dirs}])\n".format( + cols=', '.join(cols), dirs="', '".join(dirs) + )) + return final_history diff --git a/dtale/views.py b/dtale/views.py index 3f43fe33..821ebff4 100644 --- a/dtale/views.py +++ b/dtale/views.py @@ -17,8 +17,10 @@ from dtale import dtale from dtale.charts.utils import build_chart from dtale.cli.clickutils import retrieve_meta_info_and_version -from dtale.utils import (DuplicateDataError, build_shutdown_url, classify_type, - dict_merge, filter_df_for_grid, find_dtype, +from dtale.column_builders import ColumnBuilder +from dtale.utils import (DuplicateDataError, build_code_export, + build_shutdown_url, classify_type, dict_merge, + divide_chunks, filter_df_for_grid, find_dtype, find_dtype_formatter, find_selected_column, get_bool_arg, get_dtypes, get_int_arg, get_json_arg, get_str_arg, grid_columns, grid_formatter, json_date, @@ -32,10 +34,6 @@ IDX_COL = str('dtale_index') -def get_globals(key): - return globals().get(key) - - def head_data_id(): data_keys = global_state.get_data().keys() if not len(data_keys): @@ -523,7 +521,7 @@ def view_iframe(data_id=None): @dtale.route('/popup//') def view_popup(popup_type, data_id=None): """ - :class:`flask:flask.Flask` route which serves up base jinja template for any popup, additionally forwards any + :class:`flask:flask.Flask` route which serves up a base jinja template for any popup, additionally forwards any request parameters as input to template. :param popup_type: type of popup to be opened. Possible values: charts, correlations, describe, histogram, instances @@ -547,6 +545,16 @@ def pretty_print(obj): return base_render_template('dtale/popup.html', data_id, title=title, js_prefix=popup_type) +@dtale.route('/code-popup') +def view_code_popup(): + """ + :class:`flask:flask.Flask` route which serves up a base jinja template for code snippets + + :return: HTML + """ + return render_template('dtale/code_popup.html') + + @dtale.route('/processes') def get_processes(): """ @@ -748,53 +756,8 @@ def build_column(data_id): col_type = get_str_arg(request, 'type') cfg = json.loads(get_str_arg(request, 'cfg')) - def _build_numeric(cfg): - left, right, operation = (cfg.get(p) for p in ['left', 'right', 'operation']) - left = data[left['col']] if 'col' in left else float(left['val']) - right = data[right['col']] if 'col' in right else float(right['val']) - if operation == 'sum': - return left + right - if operation == 'difference': - return left - right - if operation == 'multiply': - return left * right - if operation == 'divide': - return left / right - return np.nan - - def _build_datetime(cfg): - col = cfg['col'] - if 'property' in cfg: - return getattr(data[col].dt, cfg['property']) - conversion_key = cfg['conversion'] - [freq, how] = conversion_key.split('_') - freq = dict(month='M', quarter='Q', year='Y')[freq] - conversion_data = data[[col]].set_index(col).index.to_period(freq).to_timestamp(how=how).normalize() - return pd.Series(conversion_data, index=data.index, name=name) - - def _build_bins(cfg): - col, operation, bins, labels = (cfg.get(p) for p in ['col', 'operation', 'bins', 'labels']) - bins = int(bins) - if operation == 'cut': - bin_data = pd.cut(data[col], bins=bins) - else: - bin_data = pd.qcut(data[col], q=bins) - if labels: - cats = {idx: str(cat) for idx, cat in enumerate(labels.split(','))} - else: - cats = {idx: str(cat) for idx, cat in enumerate(bin_data.cat.categories)} - bin_data = pd.Series(bin_data.cat.codes.map(cats)) - return bin_data - - output = np.nan - if col_type == 'numeric': - output = _build_numeric(cfg) - elif col_type == 'datetime': - output = _build_datetime(cfg) - elif col_type == 'bins': - output = _build_bins(cfg) - - data.loc[:, name] = output + builder = ColumnBuilder(data_id, col_type, name, cfg) + data.loc[:, name] = builder.build_column() dtype = find_dtype(data[name]) data_ranges = {} if classify_type(dtype) == 'F' and not data[name].isnull().all(): @@ -803,7 +766,10 @@ def _build_bins(cfg): global_state.set_data(data_id, data) curr_dtypes = global_state.get_dtypes(data_id) curr_dtypes.append(dtype_f(len(curr_dtypes), name)) - global_state.set_dtypes(curr_dtypes) + global_state.set_dtypes(data_id, curr_dtypes) + curr_history = global_state.get_history(data_id) or [] + curr_history += [builder.build_code()] + global_state.set_history(data_id, curr_history) return jsonify(success=True) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -863,13 +829,20 @@ def load_describe(column_series, additional_aggs=None): :return: JSON serializable dictionary of the output from calling :meth:`pandas:pandas.Series.describe` """ desc = column_series.describe().to_frame().T + code = ["# main statistics", "stats = df['{col}'].describe().to_frame().T".format(col=column_series.name)] if additional_aggs: for agg in additional_aggs: if agg == 'mode': mode = column_series.mode().values desc['mode'] = np.nan if len(mode) > 1 else mode[0] + code.append(( + "# mode\n" + "mode = df['{col}'].mode().values\n" + "stats['mode'] = np.nan if len(mode) > 1 else mode[0]" + ).format(col=column_series.name)) continue desc[agg] = getattr(column_series, agg)() + code.append("# {agg}\nstats['{agg}'] = df['{col}'].{agg}()".format(col=column_series.name, agg=agg)) desc_f_overrides = { 'I': lambda f, i, c: f.add_int(i, c, as_string=True), 'F': lambda f, i, c: f.add_float(i, c, precision=4, as_string=True), @@ -879,7 +852,7 @@ def load_describe(column_series, additional_aggs=None): if 'count' in desc: # pandas always returns 'count' as a float and it adds useless decimal points desc['count'] = desc['count'].split('.')[0] - return desc + return desc, code @dtale.route('/describe//') @@ -906,13 +879,16 @@ def describe(data_id, column): dtype = next((dtype_info['dtype'] for dtype_info in curr_dtypes if dtype_info['name'] == column), None) if classify_type(dtype) in ['I', 'F']: additional_aggs = ['sum', 'median', 'mode', 'var', 'sem', 'skew', 'kurt'] - desc = load_describe(data[column], additional_aggs=additional_aggs) + code = build_code_export(data_id) + desc, desc_code = load_describe(data[column], additional_aggs=additional_aggs) + code += desc_code return_data = dict(describe=desc, success=True) uniq_vals = data[column].unique() if 'unique' not in return_data['describe']: return_data['describe']['unique'] = json_int(len(uniq_vals), as_string=True) uniq_f = find_dtype_formatter(get_dtypes(data)[column]) if len(uniq_vals) <= 100: + code.append("uniq_vals = data['{}'].unique()".format(column)) return_data['uniques'] = dict( data=[uniq_f(u) for u in uniq_vals], top=False @@ -923,7 +899,9 @@ def describe(data_id, column): data=[uniq_f(u) for u in uniq_vals], top=True ) - + uniq_code = "uniq_vals = data['{}'].value_counts().sort_values(ascending=False).head(100).index.values" + code.append(uniq_code.format(column)) + return_data['code'] = '\n'.join(code) return jsonify(return_data) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -1018,9 +996,9 @@ def get_histogram(data_id): :param bins: the number of bins to display in your histogram, options on the front-end are 5, 10, 20, 50 :returns: JSON {results: DATA, desc: output from pd.DataFrame[col].describe(), success: True/False} """ - col = get_str_arg(request, 'col', 'values') - bins = get_int_arg(request, 'bins', 20) try: + col = get_str_arg(request, 'col', 'values') + bins = get_int_arg(request, 'bins', 20) data = run_query( global_state.get_data(data_id), get_str_arg(request, 'query'), @@ -1030,8 +1008,13 @@ def get_histogram(data_id): data = data[~pd.isnull(data[selected_col])][[selected_col]] hist = np.histogram(data, bins=bins) - desc = load_describe(data[selected_col]) - return jsonify(data=[json_float(h) for h in hist[0]], labels=['{0:.1f}'.format(l) for l in hist[1]], desc=desc) + desc, desc_code = load_describe(data[selected_col]) + code = build_code_export(data_id, imports='import numpy as np\nimport pandas as pd\n\n') + code.append("hist = np.histogram(df[~pd.isnull(df['{col}'])][['{col}']], bins={bins})".format( + col=selected_col, bins=bins)) + code += desc_code + return jsonify(data=[json_float(h) for h in hist[0]], labels=['{0:.1f}'.format(l) for l in hist[1]], + desc=desc, code='\n'.join(code)) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -1078,17 +1061,29 @@ def get_correlations(data_id): if data[valid_corr_cols].isnull().values.any(): data = data.corr(method='pearson') + code = build_code_export(data_id) + code.append("corr_data = corr_data.corr(method='pearson')") else: # using pandas.corr proved to be quite slow on large datasets so I moved to numpy: # https://stackoverflow.com/questions/48270953/pandas-corr-and-corrwith-very-slow data = np.corrcoef(data[valid_corr_cols].values, rowvar=False) data = pd.DataFrame(data, columns=valid_corr_cols, index=valid_corr_cols) - + code = build_code_export(data_id, imports='import numpy as np\nimport pandas as pd\n\n') + code.append(( + "corr_cols = [\n" + "\t'{corr_cols}'\n" + "]\n" + "corr_data = np.corrcoef(df[corr_cols].values, rowvar=False)\n" + "corr_data = pd.DataFrame(corr_data, columns=[corr_cols], index=[corr_cols])" + ).format(corr_cols="'\n\t'".join(["', '".join(chunk) for chunk in divide_chunks(valid_corr_cols, 8)]))) + + code.append("corr_data.index.name = str('column')\ncorr_data = corr_data.reset_index()") + code = '\n'.join(code) data.index.name = str('column') data = data.reset_index() col_types = grid_columns(data) f = grid_formatter(col_types, nan_display=None) - return jsonify(data=f.format_dicts(data.itertuples()), dates=valid_date_cols, rolling=rolling) + return jsonify(data=f.format_dicts(data.itertuples()), dates=valid_date_cols, rolling=rolling, code=code) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -1131,7 +1126,7 @@ def get_chart_data(data_id): allow_duplicates = get_bool_arg(request, 'allowDupes') window = get_int_arg(request, 'rollingWin') comp = get_str_arg(request, 'rollingComp') - data = build_chart(data, x, y, group_col, agg, allow_duplicates, rolling_win=window, rolling_comp=comp) + data, code = build_chart(data, x, y, group_col, agg, allow_duplicates, rolling_win=window, rolling_comp=comp) data['success'] = True return jsonify(data) except BaseException as e: @@ -1161,22 +1156,37 @@ def get_correlations_ts(data_id): ) cols = get_str_arg(request, 'cols') cols = json.loads(cols) + [col1, col2] = cols date_col = get_str_arg(request, 'dateCol') rolling_window = get_int_arg(request, 'rollingWindow') + code = build_code_export(data_id) + if rolling_window: - [col1, col2] = list(set(cols)) data = data[[date_col, col1, col2]].set_index(date_col) data = data[[col1, col2]].rolling(rolling_window).corr().reset_index() data = data.dropna() data = data[data['level_1'] == col1][[date_col, col2]] + code.append(( + "corr_ts = df[['{date_col}', '{col1}', '{col2}']].set_index('{date_col}')\n" + "corr_ts = corr_ts[['{col1}', '{col2}']].rolling({rolling_window}).corr().reset_index()\n" + "corr_ts = corr_ts.dropna()\n" + "corr_ts = corr_ts[corr_ts['level_1'] == '{col1}'][['{date_col}', '{col2}']]" + ).format(col1=col1, col2=col2, date_col=date_col, rolling_window=rolling_window)) else: - data = data.groupby(date_col)[list(set(cols))].corr(method='pearson') + data = data.groupby(date_col)[cols].corr(method='pearson') data.index.names = ['date', 'column'] data = data.reset_index() - data = data[data.column == cols[0]][['date', cols[1]]] + data = data[data.column == col1][['date', col2]] + code.append(( + "corr_ts = df.groupby('{date_col}')['{cols}'].corr(method='pearson')\n" + "corr_ts.index.names = ['date', 'column']\n" + "corr_ts = corr_ts[corr_ts.column == '{col1}'][['date', '{col2}']]\n" + ).format(col1=col1, col2=col2, date_col=date_col, cols="', '".join(cols))) data.columns = ['date', 'corr'] - return_data = build_chart(data.fillna(0), 'date', 'corr') + code.append("corr_ts.columns = ['date', 'corr']") + return_data, _code = build_chart(data.fillna(0), 'date', 'corr') return_data['success'] = True + return_data['code'] = '\n'.join(code) return jsonify(return_data) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -1207,11 +1217,12 @@ def get_scatter(data_id): y: col2 } or {error: 'Exception message', traceback: 'Exception stacktrace'} """ - cols = get_json_arg(request, 'cols') - date = get_str_arg(request, 'date') - date_col = get_str_arg(request, 'dateCol') - rolling = get_bool_arg(request, 'rolling') try: + cols = get_json_arg(request, 'cols') + date = get_str_arg(request, 'date') + date_col = get_str_arg(request, 'dateCol') + rolling = get_bool_arg(request, 'rolling') + data = run_query( global_state.get_data(data_id), get_str_arg(request, 'query'), @@ -1219,38 +1230,65 @@ def get_scatter(data_id): ) idx_col = str('index') y_cols = [cols[1], idx_col] + code = build_code_export(data_id) + if rolling: window = get_int_arg(request, 'window') idx = min(data[data[date_col] == date].index) + 1 data = data.iloc[max(idx - window, 0):idx] - data = data[list(set(cols)) + [date_col]].dropna(how='any') + data = data[cols + [date_col]].dropna(how='any') y_cols.append(date_col) + code.append(( + "idx = min(df[df['{date_col}'] == '{date}'].index) + 1\n" + "scatter_data = scatter_data.iloc[max(idx - {window}, 0):idx]\n" + "scatter_data = scatter_data['{cols}'].dropna(how='any')" + ).format( + date_col=date_col, date=date, window=window, cols="', '".join(sorted(list(set(cols)) + [date_col]))) + ) else: data = data[data[date_col] == date] if date else data - data = data[list(set(cols))].dropna(how='any') + data = data[cols].dropna(how='any') + code.append(("scatter_data = df[df['{date_col}'] == '{date}']" if date else 'scatter_data = df').format( + date_col=date_col, date=date + )) + code.append("scatter_data = scatter_data['{cols}'].dropna(how='any')".format( + cols="', '".join(cols) + )) data[idx_col] = data.index - s0 = data[cols[0]] - s1 = data[cols[1]] + [col1, col2] = cols + s0 = data[col1] + s1 = data[col2] pearson = s0.corr(s1, method='pearson') spearman = s0.corr(s1, method='spearman') stats = dict( pearson='N/A' if pd.isnull(pearson) else pearson, spearman='N/A' if pd.isnull(spearman) else spearman, correlated=len(data), - only_in_s0=len(data[data[cols[0]].isnull()]), - only_in_s1=len(data[data[cols[1]].isnull()]) + only_in_s0=len(data[data[col1].isnull()]), + only_in_s1=len(data[data[col2].isnull()]) ) + code.append(( + "scatter_data['{idx_col}'] = scatter_data.index\n" + "s0 = scatter_data['{col1}']\n" + "s1 = scatter_data['{col2}']\n" + "pearson = s0.corr(s1, method='pearson')\n" + "spearman = s0.corr(s1, method='spearman')\n" + "only_in_s0 = len(scatter_data[scatter_data['{col1}'].isnull()])\n" + "only_in_s1 = len(scatter_data[scatter_data['{col2}'].isnull()])" + ).format(col1=col1, col2=col2, idx_col=idx_col)) if len(data) > 15000: return jsonify( stats=stats, + code='\n'.join(code), error='Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...' ) - data = build_chart(data, cols[0], y_cols, allow_duplicates=True) + data, _code = build_chart(data, cols[0], y_cols, allow_duplicates=True) data['x'] = cols[0] data['y'] = cols[1] data['stats'] = stats + data['code'] = '\n'.join(code) return jsonify(data) except BaseException as e: return jsonify(dict(error=str(e), traceback=str(traceback.format_exc()))) @@ -1301,3 +1339,12 @@ def value_as_str(value): success=True) except BaseException as e: return jsonify(error=str(e), traceback=str(traceback.format_exc())) + + +@dtale.route('/code-export/') +def get_code_export(data_id): + try: + code = build_code_export(data_id) + return jsonify(code='\n'.join(code), success=True) + except BaseException as e: + return jsonify(error=str(e), traceback=str(traceback.format_exc())) diff --git a/package.json b/package.json index 442ff181..bd3f1dea 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "dtale", - "version": "1.7.9", + "version": "1.7.10", "description": "Visualizer for Pandas Data Structures", "main": "main.js", "directories": { diff --git a/setup.py b/setup.py index 3a3e2d9b..1df5407e 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def run_tests(self): setup( name="dtale", - version="1.7.9", + version="1.7.10", author="MAN Alpha Technology", author_email="ManAlphaTech@man.com", description="Web Client for Visualizing Pandas Objects", @@ -66,7 +66,7 @@ def run_tests(self): "Flask-Compress", "Flask", "flask-ngrok; python_version > '3.0'", - "future", + "future >= 0.14.0", "itsdangerous", "pandas", "requests", diff --git a/static/CopyToClipboard.jsx b/static/CopyToClipboard.jsx new file mode 100644 index 00000000..875e3464 --- /dev/null +++ b/static/CopyToClipboard.jsx @@ -0,0 +1,54 @@ +import $ from "jquery"; +import _ from "lodash"; +import PropTypes from "prop-types"; +import React from "react"; + +function canCopy() { + return document.queryCommandSupported && document.queryCommandSupported("copy"); +} + +class CopyToClipboard extends React.Component { + constructor(props) { + super(props); + } + + render() { + if (canCopy()) { + const copy = e => { + this.textArea.select(); + document.execCommand("copy"); + e.target.focus(); + $(e.target) + .parent() + .parent() + .find(`div.copy-tt-${this.props.tooltipPosition}`) + .fadeIn(300) + .delay(300) + .fadeOut(400); + }; + return [ +