Skip to content

Commit

Permalink
1.8.2:
Browse files Browse the repository at this point in the history
* #129, show dtype when hovering over header in "Highlight Dtypes" mode and description tooltips added to main menu
* made "No Aggregation" the default aggregation in charts
* bugfix for line charts with more than 15000 points
* updated "Value Counts" & "Category Breakdown" to return top on initial load
* #118, added scattergeo & choropleth maps
* #121, added "not equal" toggle to filters
* #132, updated resize button to "Refresh Widths"
* added "Animate" toggle to scatter, line & bar charts
* #131, changes to "Reshape Data" window
* #130, updates to pivot reshaper
* #128, additional hover display of code snippets for column creation
* #112, updated "Group" selection to give users the ability to select group values
  • Loading branch information
Andrew Schonfeld committed Apr 2, 2020
1 parent 0e073eb commit 61f624d
Show file tree
Hide file tree
Showing 47 changed files with 1,651 additions and 485 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defaults: &defaults
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
CODECOV_TOKEN: b0d35139-0a75-427a-907b-2c78a762f8f0
VERSION: 1.8.1
VERSION: 1.8.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
steps:
- checkout
Expand Down
2 changes: 1 addition & 1 deletion docker/2_7/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.8.1-py2.7.egg
; easy_install dtale-1.8.2-py2.7.egg
2 changes: 1 addition & 1 deletion docker/3_6/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.8.1-py3.7.egg
; easy_install dtale-1.8.2-py3.7.egg
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@
# built documents.
#
# The short X.Y version.
version = u'1.8.1'
version = u'1.8.2'
# The full version, including alpha/beta/rc tags.
release = u'1.8.1'
release = u'1.8.2'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
89 changes: 58 additions & 31 deletions dtale/charts/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pandas as pd

from dtale.utils import (classify_type, find_dtype_formatter, get_dtypes,
from dtale.utils import (ChartBuildingError, classify_type,
find_dtype_formatter, flatten_lists, get_dtypes,
grid_columns, grid_formatter, json_int, make_list)

YAXIS_CHARTS = ['line', 'bar', 'scatter']
ZAXIS_CHARTS = ['heatmap', '3d_scatter', 'surface']
MAX_GROUPS = 30


def valid_chart(chart_type=None, x=None, y=None, z=None, **inputs):
Expand All @@ -24,6 +26,14 @@ def valid_chart(chart_type=None, x=None, y=None, z=None, **inputs):
:return: `True` if executed from test, `False` otherwise
:rtype: bool
"""
if chart_type == 'maps':
map_type = inputs.get('map_type')
if map_type == 'choropleth' and all(inputs.get(p) is not None for p in ['loc_mode', 'loc', 'map_val']):
return True
elif map_type == 'scattergeo' and all(inputs.get(p) is not None for p in ['lat', 'lon', 'map_val']):
return True
return False

if x is None or not len(y or []):
return False

Expand Down Expand Up @@ -95,6 +105,8 @@ def group_filter_handler(col_def, group_val, group_classifier):
col_def_segs = col_def.split('|')
if len(col_def_segs) > 1:
col, freq = col_def_segs
if group_val == 'nan':
return '{col} != {col}'.format(col=col)
if freq == 'WD':
return '{}.dt.dayofweek == {}'.format(col, group_val)
elif freq == 'H2':
Expand Down Expand Up @@ -125,31 +137,27 @@ def group_filter_handler(col_def, group_val, group_classifier):
elif freq == 'Y':
ts_val = pd.Timestamp(group_val)
return "{col}.dt.year == {year}".format(col=col, year=ts_val.year)
if group_val == 'nan':
return '{col} != {col}'.format(col=col_def)
if group_classifier in ['I', 'F']:
return '{col} == {val}'.format(col=col_def, val=group_val)
return "{col} == '{val}'".format(col=col_def, val=group_val)


def retrieve_chart_data(df, x, y, z, group=None):
def retrieve_chart_data(df, *args, **kwargs):
"""
Retrieves data from a dataframe for x, y, z & group inputs complete with date frequency
formatting (:meth:`dtale.charts.utils.date_freq_handler`) if specified
:param df: dataframe that contains data for chart
:type df: :class:`pandas:pandas.DataFrame`
:param x: column to use for the X-Axis
:type x: str
:param y: columns to use for the Y-Axes
:type y: list of str
:param z: column to use for the Z-Axis
:type z: str
:param group: column(s) to use for grouping
:type group: list of str or str
:param args: columns to use
:type args: iterable of str
:return: dataframe of data required for chart construction
:rtype: :class:`pandas:pandas.DataFrame`
"""
freq_handler = date_freq_handler(df)
cols = [x] + make_list(y) + make_list(z) + make_list(group)
cols = flatten_lists([make_list(a) for a in args])
all_code = []
all_data = []
for col in cols:
Expand All @@ -158,8 +166,26 @@ def retrieve_chart_data(df, x, y, z, group=None):
all_data.append(s)
if code is not None:
all_code.append(code)
all_data = pd.concat(all_data, axis=1)
all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"]
return pd.concat(all_data, axis=1), all_code
if len(make_list(kwargs.get('group_val'))):
dtypes = get_dtypes(all_data)

def _group_filter(group_val):
for gc, gv in group_val.items():
classifier = classify_type(dtypes[gc])
yield group_filter_handler(gc, gv, classifier)

def _full_filter():
for group_val in kwargs['group_val']:
group_filter = ' and '.join(list(_group_filter(group_val)))
yield group_filter

filters = list(_full_filter())
filters = '({})'.format(') or ('.join(filters))
all_data = all_data.query(filters)
all_code.append('chart_data = chart_data.query({})'.format(filters))
return all_data, all_code


def check_all_nan(df, cols=None):
Expand Down Expand Up @@ -198,12 +224,12 @@ def check_exceptions(df, allow_duplicates, unlimited_data=False, data_limit=1500
:raises Exception: if any failure condition is met
"""
if not allow_duplicates and any(df.duplicated()):
raise Exception((
raise ChartBuildingError((
"{} contains duplicates, please specify group or additional filtering or select 'No Aggregation' from"
' Aggregation drop-down.'
).format(', '.join(df.columns)))
if not unlimited_data and len(df) > data_limit:
raise Exception(limit_msg.format(data_limit))
raise ChartBuildingError(limit_msg.format(data_limit))


def build_agg_data(df, x, y, inputs, agg, z=None):
Expand Down Expand Up @@ -264,8 +290,8 @@ def build_agg_data(df, x, y, inputs, agg, z=None):
]


def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False, return_raw=False,
unlimited_data=False, **kwargs):
def build_base_chart(raw_data, x, y, group_col=None, group_val=None, agg=None, allow_duplicates=False, return_raw=False,
unlimited_data=False, **kwargs):
"""
Helper function to return data for 'chart-data' & 'correlations-ts' endpoints. Will return a dictionary of
dictionaries (one for each series) which contain the data for the x & y axes of the chart as well as the minimum &
Expand All @@ -289,7 +315,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
:return: dict
"""

data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col)
data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col, group_val=group_val)
x_col = str('x')
y_cols = make_list(y)
z_col = kwargs.get('z')
Expand All @@ -300,34 +326,28 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
check_all_nan(data, [x] + y_cols)
data = data.rename(columns={x: x_col})
code.append("chart_data = chart_data.rename(columns={'" + x + "': '" + x_col + "'})")
if agg is not None:
if agg is not None and agg != 'raw':
data = data.groupby(group_col + [x_col])
data = getattr(data, agg)().reset_index()
code.append("chart_data = chart_data.groupby(['{cols}']).{agg}().reset_index()".format(
cols="', '".join(group_col + [x]), agg=agg
))
max_groups = 30
MAX_GROUPS = 30
group_vals = data[group_col].drop_duplicates()
if len(group_vals) > max_groups:
if len(group_vals) > MAX_GROUPS:
dtypes = get_dtypes(group_vals)
group_fmt_overrides = {'I': lambda v, as_string: json_int(v, as_string=as_string, fmt='{}')}
group_fmts = {c: find_dtype_formatter(dtypes[c], overrides=group_fmt_overrides) for c in group_col}

def _group_filter():
for gv, gc in zip(group_vals.values[0], group_col):
classifier = classify_type(dtypes[gc])
yield group_filter_handler(gc, group_fmts[gc](gv, as_string=True), classifier)
group_filter = ' and '.join(list(_group_filter()))

group_f, _ = build_formatters(group_vals)
group_vals = group_f.format_lists(group_vals)
group_vals = pd.DataFrame(group_vals, columns=group_col)
msg = (
'Group ({}) contains more than {} unique values, please add additional filtering'
' or else chart will be unreadable. Additional filtering can be added above, for example:\n\n'
'{}\n\nHere are the values to choose from:\n\n{}'
).format(', '.join(group_col), max_groups, group_filter, group_vals.to_string(index=False))
raise Exception(msg)
'Group ({}) contains more than {} unique values, more groups than that will make the chart unreadable. '
'You can choose specific groups to display from then "Group(s)" dropdown above. The available group(s) '
'are listed below:'
).format(', '.join(group_col), MAX_GROUPS, group_vals.to_string(index=False))
raise ChartBuildingError(msg, group_vals.to_string(index=False))

data = data.dropna()
if return_raw:
Expand Down Expand Up @@ -402,3 +422,10 @@ def weekday_tick_handler(col_data, col):
if col.endswith('|WD'):
return [WEEKDAY_MAP[d] for d in col_data]
return col_data


def find_group_vals(df, group_cols):
group_vals, _ = retrieve_chart_data(df, group_cols)
group_vals = group_vals.drop_duplicates()
group_f, _ = build_formatters(group_vals)
return group_f.format_dicts(group_vals.itertuples())
17 changes: 17 additions & 0 deletions dtale/cli/loaders/csv_loader.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import pandas as pd
import requests
from six import PY3

from dtale.app import show
from dtale.cli.clickutils import get_loader_options

if PY3:
from io import StringIO
else:
from StringIO import StringIO


'''
IMPORTANT!!! These global variables are required for building any customized CLI loader.
When build_loaders runs startup it will search for any modules containing the global variable LOADER_KEY.
'''
LOADER_KEY = 'csv'
LOADER_PROPS = [
dict(name='path', help='path to CSV file'),
dict(name='proxy', help="proxy URL if you're passing in a URL for --csv-path"),
dict(name='parse_dates', help='comma-separated string of column names which should be parsed as dates')
]

Expand All @@ -21,6 +30,14 @@ def show_loader(**kwargs):

def loader_func(**kwargs):
path = kwargs.pop('path')
if path.startswith('http://') or path.startswith('https://'): # add support for URLs
proxy = kwargs.pop('proxy', None)
req_kwargs = {}
if proxy is not None:
req_kwargs['proxies'] = dict(http=proxy, https=proxy)
resp = requests.get(path, **req_kwargs)
assert resp.status_code == 200
path = StringIO(resp.content if PY3 else resp.content.decode('utf-8'))
return pd.read_csv(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS})


Expand Down
2 changes: 2 additions & 0 deletions dtale/cli/loaders/json_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
LOADER_KEY = 'json'
LOADER_PROPS = [
dict(name='path', help='path to JSON file or URL to JSON endpoint'),
dict(name='proxy', help="proxy URL if you're passing in a URL for --json-path"),
dict(name='convert_dates', help='comma-separated string of column names which should be parsed as dates')
]

Expand All @@ -34,6 +35,7 @@ def loader_func(**kwargs):
if proxy is not None:
req_kwargs['proxies'] = dict(http=proxy, https=proxy)
resp = requests.get(path, **req_kwargs)
assert resp.status_code == 200
path = resp.json() if normalize else resp.text
if normalize:
normalize_func = pd.json_normalize if is_pandas1() else pd.io.json.json_normalize
Expand Down
13 changes: 8 additions & 5 deletions dtale/column_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ def build_filter(self):
return super(StringFilter, self).handle_missing(None)

state = self.cfg.get('value', [])
operand = self.cfg.get('operand', '=')
fltr = dict(value=state)
if len(state) == 1:
val_str = ("'{}'" if self.classification == 'S' else '{}').format(state[0])
fltr['query'] = "{} == {}".format(self.column, val_str)
fltr['query'] = "{} {} {}".format(self.column, '==' if operand == '=' else '!=', val_str)
else:
val_str = ("'{}'".format("', '".join(state)) if self.classification == 'S' else ','.join(state))
fltr['query'] = "{} in ({})".format(self.column, val_str)
fltr['query'] = "{} {} ({})".format(self.column, 'in' if operand == '=' else 'not in', val_str)
return super(StringFilter, self).handle_missing(fltr)


Expand All @@ -76,15 +77,17 @@ def build_filter(self):
return super(NumericFilter, self).handle_missing(None)
cfg_val, cfg_operand, cfg_min, cfg_max = (self.cfg.get(p) for p in ['value', 'operand', 'min', 'max'])

if cfg_operand == '=':
if cfg_operand in ['=', 'ne']:
state = make_list(cfg_val or [])
if not len(state):
return super(NumericFilter, self).handle_missing(None)
fltr = dict(value=cfg_val, operand=cfg_operand)
if len(state) == 1:
fltr['query'] = "{} == {}".format(self.column, state[0])
fltr['query'] = "{} {} {}".format(self.column, '==' if cfg_operand == '=' else '!=', state[0])
else:
fltr['query'] = "{} in ({})".format(self.column, ", ".join(state))
fltr['query'] = "{} {} ({})".format(
self.column, 'in' if cfg_operand == '=' else 'not in', ", ".join(state)
)
return super(NumericFilter, self).handle_missing(fltr)
if cfg_operand in ['<', '>', '<=', '>=']:
if cfg_val is None:
Expand Down
Loading

0 comments on commit 61f624d

Please sign in to comment.