Skip to content

Commit

Permalink
code export functionality, additional updates:
Browse files Browse the repository at this point in the history
 - limited version of futures being used, #82
  • Loading branch information
Andrew Schonfeld committed Feb 25, 2020
1 parent 9ea2e29 commit 7f85f51
Show file tree
Hide file tree
Showing 8 changed files with 296 additions and 55 deletions.
131 changes: 131 additions & 0 deletions dtale/column_builders.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import numpy as np
import pandas as pd

import dtale.global_state as global_state


class ColumnBuilder(object):

def __init__(self, data_id, column_type, name, cfg):
self.data_id = data_id
if column_type == 'numeric':
self.builder = NumericColumnBuilder(name, cfg)
elif column_type == 'datetime':
self.builder = NumericColumnBuilder(name, cfg)
elif column_type == 'bins':
self.builder = NumericColumnBuilder(name, cfg)
else:
raise NotImplementedError('{} column builder not implemented yet!'.format(column_type))

def build_column(self):
data = global_state.get_data(self.data_id)
return self.builder.build_column(data)

def build_code(self):
return self.builder.build_code()


class NumericColumnBuilder(object):

def __init__(self, name, cfg):
self.name = name
self.cfg = cfg

def build_column(self, data):
left, right, operation = (self.cfg.get(p) for p in ['left', 'right', 'operation'])
left = data[left['col']] if 'col' in left else float(left['val'])
right = data[right['col']] if 'col' in right else float(right['val'])
if operation == 'sum':
return left + right
if operation == 'difference':
return left - right
if operation == 'multiply':
return left * right
if operation == 'divide':
return left / right
return np.nan

def build_code(self):
left, right, operation = (self.cfg.get(p) for p in ['left', 'right', 'operation'])
operations = dict(sum='+', difference='-', multiply='*', divide='/')
return "df.loc[:, '{name}'] = {left} {operation} {right}".format(
name=self.name,
operation=operations.get(operation),
left="df['{}']".format(left['col']) if 'col' in left else left['val'],
right="df['{}']".format(right['col']) if 'col' in right else right['val']
)


FREQ_MAPPING = dict(month='M', quarter='Q', year='Y')


class DatetimeColumnBuilder(object):

def __init__(self, name, cfg):
self.name = name
self.cfg = cfg

def build_column(self, data):
col = self.cfg['col']
if 'property' in self.cfg:
return getattr(data[col].dt, self.cfg['property'])
conversion_key = self.cfg['conversion']
[freq, how] = conversion_key.split('_')
freq = FREQ_MAPPING[freq]
conversion_data = data[[col]].set_index(col).index.to_period(freq).to_timestamp(how=how).normalize()
return pd.Series(conversion_data, index=data.index, name=self.name)

def build_code(self):
if 'property' in self.cfg:
return "df.loc[:, '{name}'] = df['{col}'].dt.{property}".format(name=self.name, **self.cfg)
conversion_key = self.cfg['conversion']
[freq, how] = conversion_key.split('_')
freq = FREQ_MAPPING[freq]
return (
"{name}_data = data[['{col}']].set_index('{col}').index.to_period('{freq}')'"
".to_timestamp(how='{how}').normalize()\n"
"df.loc[:, '{name}'] = pd.Series({name}_data, index=df.index, name='{name}')"
).format(name=self.name, col=self.cfg['col'], freq=freq, how=how)


class BinsColumnBuilder(object):

def __init__(self, name, cfg):
self.name = name
self.cfg = cfg

def build_column(self, data):
col, operation, bins, labels = (self.cfg.get(p) for p in ['col', 'operation', 'bins', 'labels'])
bins = int(bins)
if operation == 'cut':
bin_data = pd.cut(data[col], bins=bins)
else:
bin_data = pd.qcut(data[col], q=bins)
if labels:
cats = {idx: str(cat) for idx, cat in enumerate(labels.split(','))}
else:
cats = {idx: str(cat) for idx, cat in enumerate(bin_data.cat.categories)}
return pd.Series(bin_data.cat.codes.map(cats), index=data.index, name=self.name)

def build_code(self):
col, operation, bins, labels = (self.cfg.get(p) for p in ['col', 'operation', 'bins', 'labels'])
bins_code = []
if operation == 'cut':
bins_code.append("{name}_data = pd.cut(df['{col}'], bins={bins})".format(
name=self.name, col=col, bins=bins
))
else:
bins_code.append("{name}_data = pd.qcut(df['{col}'], bins={bins})".format(
name=self.name, col=col, bins=bins
))
if labels:
labels_str = ', '.join(['{}: {}'.format(idx, cat) for idx, cat in enumerate(labels.split(','))])
labels_str = '{' + labels_str + '}'
bins_code.append('{name}_cats = {labels}'.format(name=self.name, labels=labels_str))
else:
bins_code.append(
'{name}_cats = {idx: str(cat) for idx, cat in enumerate({name}_data.cat.categories)}'
)
s_str = "df.loc[:, '{name}'] = pd.Series({name}_data.cat.codes.map({name}_cats), index=df.index, name='{name}')"
bins_code.append(s_str.format(name=self.name))
return '\n'.join(bins_code)
34 changes: 27 additions & 7 deletions dtale/global_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
SETTINGS = {}
METADATA = {}
CONTEXT_VARIABLES = {}
HISTORY = {}


def get_data(data_id=None):
Expand Down Expand Up @@ -46,6 +47,14 @@ def get_context_variables(data_id=None):
return CONTEXT_VARIABLES.get(data_id)


def get_history(data_id=None):
global HISTORY

if data_id is None:
return HISTORY
return HISTORY.get(data_id)


def set_data(data_id, val):
global DATA

Expand Down Expand Up @@ -76,17 +85,28 @@ def set_context_variables(data_id, val):
CONTEXT_VARIABLES[data_id] = val


def cleanup():
def set_history(data_id, val):
global HISTORY

HISTORY[data_id] = val


def cleanup(data_id=None):
"""
Helper function for cleanup up state related to a D-Tale process with a specific port
:param port: integer string for a D-Tale process's port
:type port: str
"""
global DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES
global DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES, HISTORY

DATA = {}
SETTINGS = {}
DTYPES = {}
METADATA = {}
CONTEXT_VARIABLES = {}
if data_id is None:
DATA = {}
SETTINGS = {}
DTYPES = {}
METADATA = {}
CONTEXT_VARIABLES = {}
HISTORY = {}
else:
for data in [DATA, DTYPES, SETTINGS, METADATA, CONTEXT_VARIABLES, HISTORY]:
data.pop(data_id, None) # use dict.pop with a default so that KeyError won't occur
83 changes: 36 additions & 47 deletions dtale/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from dtale import dtale
from dtale.charts.utils import build_chart
from dtale.cli.clickutils import retrieve_meta_info_and_version
from dtale.column_builders import ColumnBuilder
from dtale.utils import (DuplicateDataError, build_shutdown_url, classify_type,
dict_merge, filter_df_for_grid, find_dtype,
find_dtype_formatter, find_selected_column,
Expand Down Expand Up @@ -748,53 +749,8 @@ def build_column(data_id):
col_type = get_str_arg(request, 'type')
cfg = json.loads(get_str_arg(request, 'cfg'))

def _build_numeric(cfg):
left, right, operation = (cfg.get(p) for p in ['left', 'right', 'operation'])
left = data[left['col']] if 'col' in left else float(left['val'])
right = data[right['col']] if 'col' in right else float(right['val'])
if operation == 'sum':
return left + right
if operation == 'difference':
return left - right
if operation == 'multiply':
return left * right
if operation == 'divide':
return left / right
return np.nan

def _build_datetime(cfg):
col = cfg['col']
if 'property' in cfg:
return getattr(data[col].dt, cfg['property'])
conversion_key = cfg['conversion']
[freq, how] = conversion_key.split('_')
freq = dict(month='M', quarter='Q', year='Y')[freq]
conversion_data = data[[col]].set_index(col).index.to_period(freq).to_timestamp(how=how).normalize()
return pd.Series(conversion_data, index=data.index, name=name)

def _build_bins(cfg):
col, operation, bins, labels = (cfg.get(p) for p in ['col', 'operation', 'bins', 'labels'])
bins = int(bins)
if operation == 'cut':
bin_data = pd.cut(data[col], bins=bins)
else:
bin_data = pd.qcut(data[col], q=bins)
if labels:
cats = {idx: str(cat) for idx, cat in enumerate(labels.split(','))}
else:
cats = {idx: str(cat) for idx, cat in enumerate(bin_data.cat.categories)}
bin_data = pd.Series(bin_data.cat.codes.map(cats))
return bin_data

output = np.nan
if col_type == 'numeric':
output = _build_numeric(cfg)
elif col_type == 'datetime':
output = _build_datetime(cfg)
elif col_type == 'bins':
output = _build_bins(cfg)

data.loc[:, name] = output
builder = ColumnBuilder(data_id, col_type, name, cfg)
data.loc[:, name] = builder.build_column()
dtype = find_dtype(data[name])
data_ranges = {}
if classify_type(dtype) == 'F' and not data[name].isnull().all():
Expand All @@ -804,6 +760,9 @@ def _build_bins(cfg):
curr_dtypes = global_state.get_dtypes(data_id)
curr_dtypes.append(dtype_f(len(curr_dtypes), name))
global_state.set_dtypes(curr_dtypes)
curr_history = global_state.get_history(data_id)
curr_history += [builder.build_code()]
global_state.set_history(data_id, curr_history)
return jsonify(success=True)
except BaseException as e:
return jsonify(dict(error=str(e), traceback=str(traceback.format_exc())))
Expand Down Expand Up @@ -1301,3 +1260,33 @@ def value_as_str(value):
success=True)
except BaseException as e:
return jsonify(error=str(e), traceback=str(traceback.format_exc()))


@dtale.route('/code-export/<data_id>')
def get_code_export(data_id):
try:
history = global_state.get_history(data_id) or []
settings = global_state.get_settings(data_id) or {}
startup_str = (
"# DISCLAIMER: 'df' refers to the data you passed in when calling 'dtale.show'\n\n"
'import pandas as pd\n\n'
'if isinstance(df, (pd.DatetimeIndex, pd.MultiIndex)):\n'
'\tdf = df.to_frame(index=False)\n\n'
'# remove any pre-existing indices for ease of use in the D-Tale code, but this is not required\n'
"df = df.reset_index().drop('index', axis=1, errors='ignore')\n"
'df.columns = [str(c) for c in df.columns] # update columns to strings in case they are numbers\n'
)
final_history = [startup_str] + history
if 'query' in settings:
final_history.append("df = df.query('{}')\n".format(settings['query']))
if 'sort' in settings:
cols, dirs = [], []
for col, dir in settings['sort']:
cols.append(col)
dirs.append('True' if dir == 'ASC' else 'False')
final_history.append("df = df.sort_values(['{cols}'], ascending=[{dirs}])\n".format(
cols=', '.join(cols), dirs="', '".join(dirs)
))
return jsonify(code='\n'.join(final_history), success=True)
except BaseException as e:
return jsonify(error=str(e), traceback=str(traceback.format_exc()))
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def run_tests(self):
"Flask-Compress",
"Flask",
"flask-ngrok; python_version > '3.0'",
"future",
"future >= 0.14.0",
"itsdangerous",
"pandas",
"requests",
Expand Down
8 changes: 8 additions & 0 deletions static/dtale/DataViewerMenu.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ class ReactDataViewerMenu extends React.Component {
</button>
</span>
</li>
<li>
<span className="toggler-action">
<button className="btn btn-plain" onClick={openPopup("code")}>
<i className="ico-code" />
<span className="font-weight-bold">Code Export</span>
</button>
</span>
</li>
<li>
<span className="toggler-action">
<button
Expand Down
4 changes: 4 additions & 0 deletions static/main.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { Provider } from "react-redux";
import actions from "./actions/dtale";
import "./adapter-for-react-16";
import { DataViewer } from "./dtale/DataViewer";
import { CodeExport } from "./popups/CodeExport";
import { ReactCorrelations as Correlations } from "./popups/Correlations";
import { ReactDescribe as Describe } from "./popups/Describe";
import { ReactHistogram as Histogram } from "./popups/Histogram";
Expand Down Expand Up @@ -41,6 +42,9 @@ if (_.startsWith(window.location.pathname, "/dtale/popup")) {
case "instances":
rootNode = <Instances dataId={dataId} iframe={true} />;
break;
case "code":
rootNode = <CodeExport dataId={dataId} />;
break;
case "charts":
default:
rootNode = <Charts {...{ dataId, chartData }} />;
Expand Down
Loading

0 comments on commit 7f85f51

Please sign in to comment.