Skip to content

Commit

Permalink
1.7.5
Browse files Browse the repository at this point in the history
 - #64, handling for loading duplicate data to dtale.show
 - updated dtale.instances() to print urls rather than show all instances
 - removal of Dash "Export to png" function
 - passing data grid queries to chart page as default
 - added sys.exit() to the thread that manages the reaper
  • Loading branch information
Andrew Schonfeld committed Feb 21, 2020
1 parent 97634bc commit c5e1c8b
Show file tree
Hide file tree
Showing 11 changed files with 163 additions and 82 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ D-Tale is the combination of a Flask back-end and a React front-end to bring you

D-Tale was the product of a SAS to Python conversion. What was originally a perl script wrapper on top of SAS's `insight` function is now a lightweight web client on top of Pandas data structures.

## In The News
- [Man Institute](https://www.man.com/maninstitute/d-tale)
- [Python Bytes](https://pythonbytes.fm/episodes/show/169/jupyter-notebooks-natively-on-your-ipad)

## Contents

- [Getting Started](#getting-started)
Expand Down Expand Up @@ -102,6 +106,15 @@ dtale.instances() # returns a dictionary of all instances available, this would

```

#### Duplicate data check
To help guard against users loading the same data to D-Tale multiple times and thus eating up precious memory, we have a loose check for duplicate input data. The check runs the following:
* Are row & column count the same as a previously loaded piece of data?
* Are the names and order of columns the same as a previously loaded piece of data?

If both these conditions are true then you will be presented with an error and a link to the previously loaded data. Here is an example of how the interaction looks:
![](https://raw.githubusercontent.com/aschonfeld/dtale-media/master/images/Duplicate_data.png)


### Jupyter Notebook
Within any jupyter (ipython) notebook executing a cell like this will display a small instance of D-Tale in the output cell. Here are some examples:

Expand Down
109 changes: 62 additions & 47 deletions dtale/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

from dtale import dtale
from dtale.cli.clickutils import retrieve_meta_info_and_version, setup_logging
from dtale.utils import (build_shutdown_url, build_url, dict_merge, get_host,
running_with_flask_debug)
from dtale.utils import (DuplicateDataError, build_shutdown_url, build_url,
dict_merge, get_host, running_with_flask_debug)
from dtale.views import (DATA, DtaleData, cleanup, head_data_id, is_up, kill,
startup)

Expand Down Expand Up @@ -93,6 +93,7 @@ def __init__(self, import_name, reaper_on=True, url=None, *args, **kwargs):
"""
self.reaper_on = reaper_on
self.reaper = None
self.base_url = url
self.shutdown_url = build_shutdown_url(url)
self.port = None
super(DtaleFlask, self).__init__(import_name, *args, **kwargs)
Expand Down Expand Up @@ -146,7 +147,9 @@ def build_reaper(self, timeout=REAPER_TIMEOUT):

def _func():
logger.info('Executing shutdown due to inactivity...')
requests.get(self.shutdown_url)
if is_up(self.base_url): # make sure the Flask process is still running
requests.get(self.shutdown_url)
sys.exit() # kill off the reaper thread

self.reaper = Timer(timeout, _func)
self.reaper.start()
Expand Down Expand Up @@ -394,7 +397,8 @@ def is_port_in_use(port):


def show(data=None, host=None, port=None, name=None, debug=False, subprocess=True, data_loader=None,
reaper_on=True, open_browser=False, notebook=False, force=False, context_vars=None, **kwargs):
reaper_on=True, open_browser=False, notebook=False, force=False, context_vars=None, ignore_duplicate=False,
**kwargs):
"""
Entry point for kicking off D-Tale :class:`flask:flask.Flask` process from python process
Expand Down Expand Up @@ -426,6 +430,9 @@ def show(data=None, host=None, port=None, name=None, debug=False, subprocess=Tru
:param context_vars: a dictionary of the variables that will be available for use in user-defined expressions,
such as filters
:type context_vars: dict, optional
:param ignore_duplicate: if true, this will not check if this data matches any other data previously loaded to
D-Tale
:type ignore_duplicate: bool, optional
:Example:
Expand All @@ -438,59 +445,67 @@ def show(data=None, host=None, port=None, name=None, debug=False, subprocess=Tru
..link displayed in logging can be copied and pasted into any browser
"""

logfile, log_level, verbose = map(kwargs.get, ['logfile', 'log_level', 'verbose'])
setup_logging(logfile, log_level or 'info', verbose)

initialize_process_props(host, port, force)
url = build_url(ACTIVE_PORT, ACTIVE_HOST)
instance = startup(url, data=data, data_loader=data_loader, name=name, context_vars=context_vars)
is_active = not running_with_flask_debug() and is_up(url)
if is_active:
def _start():
if open_browser:
instance.open_browser()
else:
def _start():
app = build_app(url, reaper_on=reaper_on, host=ACTIVE_HOST)
if debug:
app.jinja_env.auto_reload = True
app.config['TEMPLATES_AUTO_RELOAD'] = True
else:
getLogger("werkzeug").setLevel(LOG_ERROR)

if open_browser:
instance.open_browser()

# hide banner message in production environments
cli = sys.modules.get('flask.cli')
if cli is not None:
cli.show_server_banner = lambda *x: None
try:
logfile, log_level, verbose = map(kwargs.get, ['logfile', 'log_level', 'verbose'])
setup_logging(logfile, log_level or 'info', verbose)

app.run(host='0.0.0.0', port=ACTIVE_PORT, debug=debug, threaded=True)

if subprocess:
initialize_process_props(host, port, force)
url = build_url(ACTIVE_PORT, ACTIVE_HOST)
instance = startup(url, data=data, data_loader=data_loader, name=name, context_vars=context_vars,
ignore_duplicate=ignore_duplicate)
is_active = not running_with_flask_debug() and is_up(url)
if is_active:
_start()
def _start():
if open_browser:
instance.open_browser()
else:
_thread.start_new_thread(_start, ())
def _start():
app = build_app(url, reaper_on=reaper_on, host=ACTIVE_HOST)
if debug:
app.jinja_env.auto_reload = True
app.config['TEMPLATES_AUTO_RELOAD'] = True
else:
getLogger("werkzeug").setLevel(LOG_ERROR)

if open_browser:
instance.open_browser()

# hide banner message in production environments
cli = sys.modules.get('flask.cli')
if cli is not None:
cli.show_server_banner = lambda *x: None

app.run(host='0.0.0.0', port=ACTIVE_PORT, debug=debug, threaded=True)

if subprocess:
if is_active:
_start()
else:
_thread.start_new_thread(_start, ())

if notebook:
instance.notebook()
else:
logger.info('D-Tale started at: {}'.format(url))
_start()
if notebook:
instance.notebook()
else:
logger.info('D-Tale started at: {}'.format(url))
_start()

return instance
return instance
except DuplicateDataError as ex:
print(
'It looks like this data may have already been loaded to D-Tale based on shape and column names. Here is '
'URL of the data that seems to match it:\n\n{}\n\nIf you still want to load this data please use the '
'following command:\n\ndtale.show(df, ignore_duplicate=True)'.format(
DtaleData(ex.data_id, build_url(ACTIVE_PORT, ACTIVE_HOST)).main_url()
)
)
return None


def instances():
"""
Returns a dictionary of data IDs & :class:`dtale.views.DtaleData` objects pertaining to all the current pieces of
data being viewed
:return: dict
Prints all urls to the current pieces of data being viewed
"""
return {data_id: DtaleData(data_id, build_url(ACTIVE_PORT, ACTIVE_HOST)) for data_id in DATA}
print('\n'.join([DtaleData(data_id, build_url(ACTIVE_PORT, ACTIVE_HOST)).main_url() for data_id in DATA]))


def get_instance(data_id):
Expand Down
2 changes: 1 addition & 1 deletion dtale/dash_application/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dtale.utils import (classify_type, dict_merge, divide_chunks,
flatten_lists, get_dtypes, make_list,
make_timeout_request, run_query)
from dtale.views import DATA, CONTEXT_VARIABLES
from dtale.views import CONTEXT_VARIABLES, DATA
from dtale.views import build_chart as build_chart_data


Expand Down
7 changes: 5 additions & 2 deletions dtale/dash_application/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,16 @@ def show_yaxis_ranges(**inputs):
return chart_type in YAXIS_CHARTS and len(y or [])


def charts_layout(df, **inputs):
def charts_layout(df, settings, **inputs):
"""
Builds main dash inputs with dropdown options populated with the columns of the dataframe associated with the
page. Inputs included are: chart tabs, query, x, y, z, group, aggregation, rolling window/computation,
chart per group toggle, bar sort, bar mode, y-axis range editors
:param df: dataframe to drive the charts built on page
:type df: :class:`pandas:pandas.DataFrame`
:param settings: global settings associated with this dataframe (contains properties like "query")
:type param: dict
:return: dash markup
"""
[chart_type, x, y, z, group, agg] = [inputs.get(p) for p in ['chart_type', 'x', 'y', 'z', 'group', 'agg']]
Expand Down Expand Up @@ -327,7 +330,7 @@ def charts_layout(df, **inputs):
html.Div([
query_label, dcc.Input(
id='query-input', type='text', placeholder=query_placeholder, className='form-control',
value=inputs.get('query'), style={'line-height': 'inherit'})
value=inputs.get('query') or settings.get('query'), style={'line-height': 'inherit'})
], className='input-group mr-3')],
className='col'
), className='row pt-3 pb-3 charts-filters'),
Expand Down
8 changes: 5 additions & 3 deletions dtale/dash_application/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
show_input_handler,
show_yaxis_ranges)
from dtale.utils import dict_merge, make_list, run_query
from dtale.views import DATA, CONTEXT_VARIABLES
from dtale.views import CONTEXT_VARIABLES, DATA, SETTINGS

logger = getLogger(__name__)

Expand Down Expand Up @@ -325,5 +325,7 @@ def display_page(_ts, pathname, search):
"""
dash_app.config.suppress_callback_exceptions = False
params = chart_url_params(search)
df = DATA[get_data_id(pathname)]
return charts_layout(df, **params)
data_id = get_data_id(pathname)
df = DATA[data_id]
settings = SETTINGS.get(data_id) or {}
return charts_layout(df, settings, **params)
6 changes: 4 additions & 2 deletions dtale/static/css/dash.css
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,9 @@ div.tab-container > div.tab:last-child {
border: 1px solid #a7b3b7;
border-radius: 0 .25rem .25rem 0;
}
div.modebar > div.modebar-group:last-child {
display: none; /* hide plotly logo */
div.modebar > div.modebar-group:last-child, /* hide plotly logo */
div.modebar > div.modebar-group:first-child /* hide plotly "export to png" */
{
display: none;
}

8 changes: 7 additions & 1 deletion dtale/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def run_query(df, query, context_vars):
:param df: input dataframe
:type df: :class:`pandas:pandas.DataFrame`
:param query: query string
:type query: string
:type query: str
:param context_vars: dictionary of user-defined variables which can be referenced by name in query strings
:type context_vars: dict
:return: filtered dataframe
Expand Down Expand Up @@ -804,3 +804,9 @@ def run_query(df, query, context_vars):
if not len(df):
raise Exception('query "{}" found no data, please alter'.format(query))
return df


class DuplicateDataError(Exception):
def __init__(self, data_id):
super(DuplicateDataError, self).__init__("Duplicate Data")
self.data_id = data_id
Loading

0 comments on commit c5e1c8b

Please sign in to comment.