Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1.8.2: #138

Merged
merged 1 commit into from
Apr 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defaults: &defaults
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
CODECOV_TOKEN: b0d35139-0a75-427a-907b-2c78a762f8f0
VERSION: 1.8.1
VERSION: 1.8.2
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
steps:
- checkout
Expand Down
14 changes: 14 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
## Changelog

### 1.8.2 (2020-4-1)
* [#129](https://github.com/man-group/dtale/issues/129), show dtype when hovering over header in "Highlight Dtypes" mode and description tooltips added to main menu
* made "No Aggregation" the default aggregation in charts
* bugfix for line charts with more than 15000 points
* updated "Value Counts" & "Category Breakdown" to return top on initial load
* [#118](https://github.com/man-group/dtale/issues/118), added scattergeo & choropleth maps
* [#121](https://github.com/man-group/dtale/issues/121), added "not equal" toggle to filters
* [#132](https://github.com/man-group/dtale/issues/132), updated resize button to "Refresh Widths"
* added "Animate" toggle to scatter, line & bar charts
* [#131](https://github.com/man-group/dtale/issues/131), changes to "Reshape Data" window
* [#130](https://github.com/man-group/dtale/issues/130), updates to pivot reshaper
* [#128](https://github.com/man-group/dtale/issues/128), additional hover display of code snippets for column creation
* [#112](https://github.com/man-group/dtale/issues/112), updated "Group" selection to give users the ability to select group values

### 1.8.1 (2020-3-29)
* [#92](https://github.com/man-group/dtale/issues/92), column builders for random data
* [#84](https://github.com/man-group/dtale/issues/84), highlight columns based on dtype
Expand Down
2 changes: 1 addition & 1 deletion docker/2_7/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.8.1-py2.7.egg
; easy_install dtale-1.8.2-py2.7.egg
2 changes: 1 addition & 1 deletion docker/3_6/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.8.1-py3.7.egg
; easy_install dtale-1.8.2-py3.7.egg
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@
# built documents.
#
# The short X.Y version.
version = u'1.8.1'
version = u'1.8.2'
# The full version, including alpha/beta/rc tags.
release = u'1.8.1'
release = u'1.8.2'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
89 changes: 58 additions & 31 deletions dtale/charts/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pandas as pd

from dtale.utils import (classify_type, find_dtype_formatter, get_dtypes,
from dtale.utils import (ChartBuildingError, classify_type,
find_dtype_formatter, flatten_lists, get_dtypes,
grid_columns, grid_formatter, json_int, make_list)

YAXIS_CHARTS = ['line', 'bar', 'scatter']
ZAXIS_CHARTS = ['heatmap', '3d_scatter', 'surface']
MAX_GROUPS = 30


def valid_chart(chart_type=None, x=None, y=None, z=None, **inputs):
Expand All @@ -24,6 +26,14 @@ def valid_chart(chart_type=None, x=None, y=None, z=None, **inputs):
:return: `True` if executed from test, `False` otherwise
:rtype: bool
"""
if chart_type == 'maps':
map_type = inputs.get('map_type')
if map_type == 'choropleth' and all(inputs.get(p) is not None for p in ['loc_mode', 'loc', 'map_val']):
return True
elif map_type == 'scattergeo' and all(inputs.get(p) is not None for p in ['lat', 'lon', 'map_val']):
return True
return False

if x is None or not len(y or []):
return False

Expand Down Expand Up @@ -95,6 +105,8 @@ def group_filter_handler(col_def, group_val, group_classifier):
col_def_segs = col_def.split('|')
if len(col_def_segs) > 1:
col, freq = col_def_segs
if group_val == 'nan':
return '{col} != {col}'.format(col=col)
if freq == 'WD':
return '{}.dt.dayofweek == {}'.format(col, group_val)
elif freq == 'H2':
Expand Down Expand Up @@ -125,31 +137,27 @@ def group_filter_handler(col_def, group_val, group_classifier):
elif freq == 'Y':
ts_val = pd.Timestamp(group_val)
return "{col}.dt.year == {year}".format(col=col, year=ts_val.year)
if group_val == 'nan':
return '{col} != {col}'.format(col=col_def)
if group_classifier in ['I', 'F']:
return '{col} == {val}'.format(col=col_def, val=group_val)
return "{col} == '{val}'".format(col=col_def, val=group_val)


def retrieve_chart_data(df, x, y, z, group=None):
def retrieve_chart_data(df, *args, **kwargs):
"""
Retrieves data from a dataframe for x, y, z & group inputs complete with date frequency
formatting (:meth:`dtale.charts.utils.date_freq_handler`) if specified

:param df: dataframe that contains data for chart
:type df: :class:`pandas:pandas.DataFrame`
:param x: column to use for the X-Axis
:type x: str
:param y: columns to use for the Y-Axes
:type y: list of str
:param z: column to use for the Z-Axis
:type z: str
:param group: column(s) to use for grouping
:type group: list of str or str
:param args: columns to use
:type args: iterable of str
:return: dataframe of data required for chart construction
:rtype: :class:`pandas:pandas.DataFrame`
"""
freq_handler = date_freq_handler(df)
cols = [x] + make_list(y) + make_list(z) + make_list(group)
cols = flatten_lists([make_list(a) for a in args])
all_code = []
all_data = []
for col in cols:
Expand All @@ -158,8 +166,26 @@ def retrieve_chart_data(df, x, y, z, group=None):
all_data.append(s)
if code is not None:
all_code.append(code)
all_data = pd.concat(all_data, axis=1)
all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"]
return pd.concat(all_data, axis=1), all_code
if len(make_list(kwargs.get('group_val'))):
dtypes = get_dtypes(all_data)

def _group_filter(group_val):
for gc, gv in group_val.items():
classifier = classify_type(dtypes[gc])
yield group_filter_handler(gc, gv, classifier)

def _full_filter():
for group_val in kwargs['group_val']:
group_filter = ' and '.join(list(_group_filter(group_val)))
yield group_filter

filters = list(_full_filter())
filters = '({})'.format(') or ('.join(filters))
all_data = all_data.query(filters)
all_code.append('chart_data = chart_data.query({})'.format(filters))
return all_data, all_code


def check_all_nan(df, cols=None):
Expand Down Expand Up @@ -198,12 +224,12 @@ def check_exceptions(df, allow_duplicates, unlimited_data=False, data_limit=1500
:raises Exception: if any failure condition is met
"""
if not allow_duplicates and any(df.duplicated()):
raise Exception((
raise ChartBuildingError((
"{} contains duplicates, please specify group or additional filtering or select 'No Aggregation' from"
' Aggregation drop-down.'
).format(', '.join(df.columns)))
if not unlimited_data and len(df) > data_limit:
raise Exception(limit_msg.format(data_limit))
raise ChartBuildingError(limit_msg.format(data_limit))


def build_agg_data(df, x, y, inputs, agg, z=None):
Expand Down Expand Up @@ -264,8 +290,8 @@ def build_agg_data(df, x, y, inputs, agg, z=None):
]


def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False, return_raw=False,
unlimited_data=False, **kwargs):
def build_base_chart(raw_data, x, y, group_col=None, group_val=None, agg=None, allow_duplicates=False, return_raw=False,
unlimited_data=False, **kwargs):
"""
Helper function to return data for 'chart-data' & 'correlations-ts' endpoints. Will return a dictionary of
dictionaries (one for each series) which contain the data for the x & y axes of the chart as well as the minimum &
Expand All @@ -289,7 +315,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
:return: dict
"""

data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col)
data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col, group_val=group_val)
x_col = str('x')
y_cols = make_list(y)
z_col = kwargs.get('z')
Expand All @@ -300,34 +326,28 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
check_all_nan(data, [x] + y_cols)
data = data.rename(columns={x: x_col})
code.append("chart_data = chart_data.rename(columns={'" + x + "': '" + x_col + "'})")
if agg is not None:
if agg is not None and agg != 'raw':
data = data.groupby(group_col + [x_col])
data = getattr(data, agg)().reset_index()
code.append("chart_data = chart_data.groupby(['{cols}']).{agg}().reset_index()".format(
cols="', '".join(group_col + [x]), agg=agg
))
max_groups = 30
MAX_GROUPS = 30
group_vals = data[group_col].drop_duplicates()
if len(group_vals) > max_groups:
if len(group_vals) > MAX_GROUPS:
dtypes = get_dtypes(group_vals)
group_fmt_overrides = {'I': lambda v, as_string: json_int(v, as_string=as_string, fmt='{}')}
group_fmts = {c: find_dtype_formatter(dtypes[c], overrides=group_fmt_overrides) for c in group_col}

def _group_filter():
for gv, gc in zip(group_vals.values[0], group_col):
classifier = classify_type(dtypes[gc])
yield group_filter_handler(gc, group_fmts[gc](gv, as_string=True), classifier)
group_filter = ' and '.join(list(_group_filter()))

group_f, _ = build_formatters(group_vals)
group_vals = group_f.format_lists(group_vals)
group_vals = pd.DataFrame(group_vals, columns=group_col)
msg = (
'Group ({}) contains more than {} unique values, please add additional filtering'
' or else chart will be unreadable. Additional filtering can be added above, for example:\n\n'
'{}\n\nHere are the values to choose from:\n\n{}'
).format(', '.join(group_col), max_groups, group_filter, group_vals.to_string(index=False))
raise Exception(msg)
'Group ({}) contains more than {} unique values, more groups than that will make the chart unreadable. '
'You can choose specific groups to display from then "Group(s)" dropdown above. The available group(s) '
'are listed below:'
).format(', '.join(group_col), MAX_GROUPS, group_vals.to_string(index=False))
raise ChartBuildingError(msg, group_vals.to_string(index=False))

data = data.dropna()
if return_raw:
Expand Down Expand Up @@ -402,3 +422,10 @@ def weekday_tick_handler(col_data, col):
if col.endswith('|WD'):
return [WEEKDAY_MAP[d] for d in col_data]
return col_data


def find_group_vals(df, group_cols):
group_vals, _ = retrieve_chart_data(df, group_cols)
group_vals = group_vals.drop_duplicates()
group_f, _ = build_formatters(group_vals)
return group_f.format_dicts(group_vals.itertuples())
17 changes: 17 additions & 0 deletions dtale/cli/loaders/csv_loader.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
import pandas as pd
import requests
from six import PY3

from dtale.app import show
from dtale.cli.clickutils import get_loader_options

if PY3:
from io import StringIO
else:
from StringIO import StringIO


'''
IMPORTANT!!! These global variables are required for building any customized CLI loader.
When build_loaders runs startup it will search for any modules containing the global variable LOADER_KEY.
'''
LOADER_KEY = 'csv'
LOADER_PROPS = [
dict(name='path', help='path to CSV file'),
dict(name='proxy', help="proxy URL if you're passing in a URL for --csv-path"),
dict(name='parse_dates', help='comma-separated string of column names which should be parsed as dates')
]

Expand All @@ -21,6 +30,14 @@ def show_loader(**kwargs):

def loader_func(**kwargs):
path = kwargs.pop('path')
if path.startswith('http://') or path.startswith('https://'): # add support for URLs
proxy = kwargs.pop('proxy', None)
req_kwargs = {}
if proxy is not None:
req_kwargs['proxies'] = dict(http=proxy, https=proxy)
resp = requests.get(path, **req_kwargs)
assert resp.status_code == 200
path = StringIO(resp.content if PY3 else resp.content.decode('utf-8'))
return pd.read_csv(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS})


Expand Down
2 changes: 2 additions & 0 deletions dtale/cli/loaders/json_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
LOADER_KEY = 'json'
LOADER_PROPS = [
dict(name='path', help='path to JSON file or URL to JSON endpoint'),
dict(name='proxy', help="proxy URL if you're passing in a URL for --json-path"),
dict(name='convert_dates', help='comma-separated string of column names which should be parsed as dates')
]

Expand All @@ -34,6 +35,7 @@ def loader_func(**kwargs):
if proxy is not None:
req_kwargs['proxies'] = dict(http=proxy, https=proxy)
resp = requests.get(path, **req_kwargs)
assert resp.status_code == 200
path = resp.json() if normalize else resp.text
if normalize:
normalize_func = pd.json_normalize if is_pandas1() else pd.io.json.json_normalize
Expand Down
13 changes: 8 additions & 5 deletions dtale/column_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ def build_filter(self):
return super(StringFilter, self).handle_missing(None)

state = self.cfg.get('value', [])
operand = self.cfg.get('operand', '=')
fltr = dict(value=state)
if len(state) == 1:
val_str = ("'{}'" if self.classification == 'S' else '{}').format(state[0])
fltr['query'] = "{} == {}".format(self.column, val_str)
fltr['query'] = "{} {} {}".format(self.column, '==' if operand == '=' else '!=', val_str)
else:
val_str = ("'{}'".format("', '".join(state)) if self.classification == 'S' else ','.join(state))
fltr['query'] = "{} in ({})".format(self.column, val_str)
fltr['query'] = "{} {} ({})".format(self.column, 'in' if operand == '=' else 'not in', val_str)
return super(StringFilter, self).handle_missing(fltr)


Expand All @@ -76,15 +77,17 @@ def build_filter(self):
return super(NumericFilter, self).handle_missing(None)
cfg_val, cfg_operand, cfg_min, cfg_max = (self.cfg.get(p) for p in ['value', 'operand', 'min', 'max'])

if cfg_operand == '=':
if cfg_operand in ['=', 'ne']:
state = make_list(cfg_val or [])
if not len(state):
return super(NumericFilter, self).handle_missing(None)
fltr = dict(value=cfg_val, operand=cfg_operand)
if len(state) == 1:
fltr['query'] = "{} == {}".format(self.column, state[0])
fltr['query'] = "{} {} {}".format(self.column, '==' if cfg_operand == '=' else '!=', state[0])
else:
fltr['query'] = "{} in ({})".format(self.column, ", ".join(state))
fltr['query'] = "{} {} ({})".format(
self.column, 'in' if cfg_operand == '=' else 'not in', ", ".join(state)
)
return super(NumericFilter, self).handle_missing(fltr)
if cfg_operand in ['<', '>', '<=', '>=']:
if cfg_val is None:
Expand Down
Loading