Skip to content

Commit

Permalink
1.7.10
Browse files Browse the repository at this point in the history
 * #75, added code snippet functionality to the following:
   * main grid, histogram, correlations, column building & charts
 * exposed CLI loaders through the following functions dtale.show_csv, dtale.show_json, dtale.show_arctic
   * build in such a way that it is easy for custom loaders to be exposed as well
 * #82, pinned `future` package to be >= 0.14.0
  • Loading branch information
Andrew Schonfeld authored and aschonfeld committed Feb 27, 2020
1 parent 9ea2e29 commit 84f0021
Show file tree
Hide file tree
Showing 57 changed files with 1,538 additions and 270 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defaults: &defaults
CIRCLE_ARTIFACTS: /tmp/circleci-artifacts
CIRCLE_TEST_REPORTS: /tmp/circleci-test-results
CODECOV_TOKEN: b0d35139-0a75-427a-907b-2c78a762f8f0
VERSION: 1.7.9
VERSION: 1.7.10
PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases
steps:
- checkout
Expand Down
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
## Changelog

### 1.7.10 (2020-2-27)
* [#75](https://github.com/man-group/dtale/issues/75), added code snippet functionality to the following:
* main grid, histogram, correlations, column building & charts
* exposed CLI loaders through the following functions dtale.show_csv, dtale.show_json, dtale.show_arctic
* build in such a way that it is easy for custom loaders to be exposed as well
* [#82](https://github.com/man-group/dtale/issues/82), pinned `future` package to be >= 0.14.0

### 1.7.9 (2020-2-24)
* support for google colab
* bugfixes: [#71](https://github.com/man-group/dtale/issues/71), [#72](https://github.com/man-group/dtale/issues/72), [#73](https://github.com/man-group/dtale/issues/73)
Expand Down
2 changes: 1 addition & 1 deletion docker/2_7/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.7.9-py2.7.egg
; easy_install dtale-1.7.10-py2.7.egg
2 changes: 1 addition & 1 deletion docker/3_6/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ WORKDIR /app

RUN set -eux \
; . /root/.bashrc \
; easy_install dtale-1.7.9-py3.7.egg
; easy_install dtale-1.7.10-py3.7.egg
4 changes: 2 additions & 2 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@
# built documents.
#
# The short X.Y version.
version = u'1.7.9'
version = u'1.7.10'
# The full version, including alpha/beta/rc tags.
release = u'1.7.9'
release = u'1.7.10'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
5 changes: 5 additions & 0 deletions dtale/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@

# flake8: NOQA
from dtale.app import show, get_instance, instances # isort:skip
from dtale.cli.loaders import LOADERS # isort:skip

for loader_name, loader in LOADERS.items():
if hasattr(loader, 'show_loader'):
globals()['show_{}'.format(loader_name)] = loader.show_loader
2 changes: 1 addition & 1 deletion dtale/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
ACTIVE_HOST = None
ACTIVE_PORT = None

SHORT_LIFE_PATHS = ['dist']
SHORT_LIFE_PATHS = ['dist', 'dash']
SHORT_LIFE_TIMEOUT = 60

REAPER_TIMEOUT = 60.0 * 60.0 # one-hour
Expand Down
62 changes: 51 additions & 11 deletions dtale/charts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,19 @@ def _handler(col_def):
if len(col_def_segs) > 1 and classify_type(dtypes[col_def_segs[0]]) == 'D':
col, freq = col_def_segs
if freq == 'WD':
code = "df.set_index('{col}').index.dayofweek.values"
freq_grp = df.set_index(col).index.dayofweek.values
elif freq == 'H2':
code = "df.set_index('{col}').index.hour.values"
freq_grp = df.set_index(col).index.hour.values
else:
code = "df.set_index('{col}').index.to_period('{freq}').to_timestamp(how='end').values"
freq_grp = df.set_index(col).index.to_period(freq).to_timestamp(how='end').values
code = "\tpd.Series(" + code + ", index=df.index, name='{col_def}'),"
freq_grp = pd.Series(freq_grp, index=orig_idx, name=col_def)
return freq_grp
return df[col_def]
return freq_grp, code.format(col=col, freq=freq, col_def=col_def)
else:
return df[col_def], "\tdf['{col_def}'],".format(col_def=col_def)
return _handler


Expand All @@ -101,12 +106,21 @@ def retrieve_chart_data(df, x, y, z, group=None):
:type z: str
:param group: column(s) to use for grouping
:type group: list of str or str
:return: dataframe of data required for chart constructiuon
:return: dataframe of data required for chart construction
:rtype: :class:`pandas:pandas.DataFrame`
"""
freq_handler = date_freq_handler(df)
cols = [x] + make_list(y) + [z] + make_list(group)
return pd.concat([freq_handler(c) for c in cols if c is not None], axis=1)
all_code = []
all_data = []
for col in cols:
if col is not None:
s, code = freq_handler(col)
all_data.append(s)
if code is not None:
all_code.append(code)
all_code = ["chart_data = pd.concat(["] + all_code + ["], axis=1)"]
return pd.concat(all_data, axis=1), all_code


def check_all_nan(df, cols=None):
Expand Down Expand Up @@ -183,13 +197,29 @@ def build_agg_data(df, x, y, inputs, agg, z=None):
window, comp = map(inputs.get, ['rolling_win', 'rolling_comp'])
agg_df = df.set_index(x).rolling(window=window)
agg_df = pd.DataFrame({c: getattr(agg_df[c], comp)() for c in y})
return agg_df.reset_index()
agg_df = agg_df.reset_index()
code = [
"chart_data = chart_data.set_index('{x}').rolling(window={window})".format(x=x, window=window),
"chart_data = pd.DataFrame({'" + ', '.join(
["'{c}': chart_data['{c}'].{comp}()".format(c=c, comp=comp) for c in y]
) + '})',
"chart_data = chart_data.reset_index()"
]
return agg_df, code

if z_exists:
groups = df.groupby([x] + make_list(y))
return getattr(groups[make_list(z)], agg)().reset_index()
return getattr(groups[make_list(z)], agg)().reset_index(), [
"chart_data = chart_data.groupby(['{cols}'])[['{z}']].{agg}().reset_index()".format(
cols="', '".join([x] + make_list(y)), z=z, agg=agg
)
]
groups = df.groupby(x)
return getattr(groups[y], agg)().reset_index()
return getattr(groups[y], agg)().reset_index(), [
"chart_data = chart_data.groupby('{x}')[['{y}']].{agg}().reset_index()".format(
x=x, y=y, agg=agg
)
]


def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False, **kwargs):
Expand All @@ -216,7 +246,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
:return: dict
"""

data = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col)
data, code = retrieve_chart_data(raw_data, x, y, kwargs.get('z'), group_col)
x_col = str('x')
y_cols = make_list(y)
z_col = kwargs.get('z')
Expand All @@ -225,11 +255,16 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
z_cols = [z_col]
if group_col is not None and len(group_col):
data = data.sort_values(group_col + [x])
code.append("chart_data = chart_data.sort_values(['{cols}'])".format(cols="', '".join(group_col + [x])))
check_all_nan(data, [x] + y_cols)
data = data.rename(columns={x: x_col})
code.append("chart_data = chart_data.rename(columns={'" + x + "': '" + x_col + "'})")
if agg is not None:
data = data.groupby(group_col + [x_col])
data = getattr(data, agg)().reset_index()
code.append("chart_data = chart_data.groupby(['{cols}']).{agg}().reset_index()".format(
cols="', '".join(group_col + [x]), agg=agg
))
max_groups = 15
if len(data[group_col].drop_duplicates()) > max_groups:
msg = (
Expand All @@ -239,6 +274,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
raise Exception(msg)

data = data.dropna()
code.append("chart_data = chart_data.dropna()")
data_f, range_f = build_formatters(data)
ret_data = dict(
data={},
Expand All @@ -255,15 +291,19 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
])
ret_data['data'][group_val] = data_f.format_lists(grp)
ret_data['dtypes'] = {c: classify_type(dtype) for c, dtype in dtypes.items()}
return ret_data
return ret_data, code
sort_cols = [x] + (y_cols if len(z_cols) else [])
data = data.sort_values(sort_cols)
code.append("chart_data = chart_data.sort_values(['{cols}'])".format(cols="', '".join(sort_cols)))
check_all_nan(data, [x] + y_cols + z_cols)
y_cols = [str(y_col) for y_col in y_cols]
data.columns = [x_col] + y_cols + z_cols
code.append("chart_data.columns = ['{cols}']".format(cols="', '".join([x_col] + y_cols + z_cols)))
if agg is not None:
data = build_agg_data(data, x_col, y_cols, kwargs, agg, z=z_col)
data, agg_code = build_agg_data(data, x_col, y_cols, kwargs, agg, z=z_col)
code += agg_code
data = data.dropna()
code.append("chart_data = chart_data.dropna()")

dupe_cols = [x_col] + (y_cols if len(z_cols) else [])
check_exceptions(data[dupe_cols].rename(columns={'x': x}), allow_duplicates,
Expand All @@ -274,7 +314,7 @@ def build_chart(raw_data, x, y, group_col=None, agg=None, allow_duplicates=False
min={col: fmt(data[col].min(), None) for _, col, fmt in range_f.fmts if col in [x_col] + y_cols + z_cols},
max={col: fmt(data[col].max(), None) for _, col, fmt in range_f.fmts if col in [x_col] + y_cols + z_cols}
)
return ret_data
return ret_data, code


WEEKDAY_MAP = {idx: day for idx, day in enumerate(['Mon', 'Tues', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun'])}
Expand Down
41 changes: 25 additions & 16 deletions dtale/cli/loaders/arctic_loader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from builtins import map
from logging import getLogger

import pandas as pd

from dtale.app import show
from dtale.cli.clickutils import get_loader_options

logger = getLogger(__name__)
Expand All @@ -22,6 +22,29 @@
]


# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end.
def show_loader(**kwargs):
return show(data_loader=lambda: loader_func(**kwargs), **kwargs)


def loader_func(**kwargs):
try:
from arctic import Arctic
from arctic.store.versioned_item import VersionedItem
except ImportError:
raise ImportError('In order to use the arctic loader you must install arctic!')
host = Arctic(kwargs.get('host'))
lib = host.get_library(kwargs.get('library'))
read_kwargs = {}
start, end = (kwargs.get(p) for p in ['start', 'end'])
if start and end:
read_kwargs['chunk_range'] = pd.date_range(start, end)
data = lib.read(kwargs.get('node'), **read_kwargs)
if isinstance(data, VersionedItem):
data = data.data
return data


# IMPORTANT!!! This function is required for building any customized CLI loader.
def find_loader(kwargs):
"""
Expand All @@ -33,23 +56,9 @@ def find_loader(kwargs):
"""
arctic_opts = get_loader_options(LOADER_KEY, kwargs)
if len([f for f in arctic_opts.values() if f]):
try:
from arctic import Arctic
from arctic.store.versioned_item import VersionedItem
except ImportError:
raise ImportError('In order to use the --arctic loader you must install arctic!')

def _arctic_loader():
host = Arctic(arctic_opts['host'])
lib = host.get_library(arctic_opts['library'])
read_kwargs = {}
start, end = map(arctic_opts.get, ['start', 'end'])
if start and end:
read_kwargs['chunk_range'] = pd.date_range(start, end)
data = lib.read(arctic_opts['node'], **read_kwargs)
if isinstance(data, VersionedItem):
data = data.data
return data
return loader_func(**arctic_opts)

return _arctic_loader
return None
15 changes: 13 additions & 2 deletions dtale/cli/loaders/csv_loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd

from dtale.app import show
from dtale.cli.clickutils import get_loader_options

'''
Expand All @@ -13,6 +14,16 @@
]


# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end.
def show_loader(**kwargs):
return show(data_loader=lambda: loader_func(**kwargs), **kwargs)


def loader_func(**kwargs):
path = kwargs.pop('path')
return pd.read_csv(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS})


# IMPORTANT!!! This function is required for building any customized CLI loader.
def find_loader(kwargs):
"""
Expand All @@ -28,7 +39,7 @@ def _csv_loader():
csv_arg_parsers = { # TODO: add additional arg parsers
'parse_dates': lambda v: v.split(',') if v else None
}
kwargs = {k: csv_arg_parsers.get(k, lambda v: v)(v) for k, v in csv_opts.items() if k != 'path'}
return pd.read_csv(csv_opts['path'], **kwargs)
kwargs = {k: csv_arg_parsers.get(k, lambda v: v)(v) for k, v in csv_opts.items()}
return loader_func(**kwargs)
return _csv_loader
return None
26 changes: 24 additions & 2 deletions dtale/cli/loaders/json_loader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import requests

from dtale.app import show
from dtale.cli.clickutils import get_loader_options

'''
Expand All @@ -13,6 +15,26 @@
]


# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end.
def show_loader(**kwargs):
return show(data_loader=lambda: loader_func(**kwargs), **kwargs)


def loader_func(**kwargs):
path = kwargs.pop('path')
normalize = kwargs.pop('normalize', False)
if path.startswith('http://') or path.startswith('https://'): # add support for URLs
proxy = kwargs.pop('proxy', None)
req_kwargs = {}
if proxy is not None:
req_kwargs['proxies'] = dict(http=proxy, https=proxy)
resp = requests.get(path, **req_kwargs)
path = resp.json() if normalize else resp.text
if normalize:
return pd.io.json.json_normalize(path, **kwargs)
return pd.read_json(path, **{k: v for k, v in kwargs.items() if k in LOADER_PROPS})


# IMPORTANT!!! This function is required for building any customized CLI loader.
def find_loader(kwargs):
"""
Expand All @@ -28,7 +50,7 @@ def _json_loader():
json_arg_parsers = { # TODO: add additional arg parsers
'parse_dates': lambda v: v.split(',') if v else None
}
kwargs = {k: json_arg_parsers.get(k, lambda v: v)(v) for k, v in json_opts.items() if k != 'path'}
return pd.read_json(json_opts['path'], **kwargs)
kwargs = {k: json_arg_parsers.get(k, lambda v: v)(v) for k, v in json_opts.items()}
return loader_func(**kwargs)
return _json_loader
return None
Loading

0 comments on commit 84f0021

Please sign in to comment.