1.13.0

* #231: "Lock Zoom" button on 3D Scatter & Surface charts for locking camera on animations * global & instance-level flag to turn off cell editing * added the ability to upload CSVs * upgraded prismjs * #234: update to line animations so that you can lock axes and highlight last point * #233: add candlestick charts * #241: total counts vs. count (non-nan) in describe * #240: force convert to float * #239: converting mixed columns * #237: updated "Pivot" reshaper to always using pivot_table * #236: "inplace" & "drop_index" parameters for memory optimization and parquet loader
man-group · Aug 13, 2020 · 26326bc · 26326bc
1 parent e5f07db
commit 26326bc
Show file tree

Hide file tree

Showing 57 changed files with 1,989 additions and 310 deletions.
diff --git a/README.md b/README.md
@@ -26,7 +26,7 @@ D-Tale was the product of a SAS to Python conversion.  What was originally a per
 ## In The News
  - [Man Institute](https://www.man.com/maninstitute/d-tale) (warning: contains deprecated functionality)
  - [Python Bytes](https://pythonbytes.fm/episodes/show/169/jupyter-notebooks-natively-on-your-ipad)
- - [PyCon 2020](https://www.youtube.com/watch?v=BNgolmUWBp4&t=33s)
+ - [FlaskCon 2020](https://www.youtube.com/watch?v=BNgolmUWBp4&t=33s)
  - [San Diego Python](https://www.youtube.com/watch?v=fLsGur5YqeE&t=29s)
  - [Medium: towards data science](https://towardsdatascience.com/introduction-to-d-tale-5eddd81abe3f)
  - [Medium: Exploratory Data Analysis – Using D-Tale](https://medium.com/da-tum/exploratory-data-analysis-1-4-using-d-tale-99a2c267db79)

diff --git a/dtale/__init__.py b/dtale/__init__.py
@@ -5,6 +5,7 @@
 # flake8: NOQA
 from dtale.app import show, get_instance, instances, offline_chart  # isort:skip
 from dtale.cli.loaders import LOADERS  # isort:skip
+from dtale.global_state import ALLOW_CELL_EDITS
 
 for loader_name, loader in LOADERS.items():
     if hasattr(loader, "show_loader"):

diff --git a/dtale/app.py b/dtale/app.py
@@ -533,6 +533,9 @@ def show(
     context_vars=None,
     ignore_duplicate=False,
     app_root=None,
+    allow_cell_edits=True,
+    inplace=False,
+    drop_index=False,
     **kwargs
 ):
     """
@@ -569,6 +572,17 @@ def show(
     :param ignore_duplicate: if true, this will not check if this data matches any other data previously loaded to
                              D-Tale
     :type ignore_duplicate: bool, optional
+    :param app_root: Optional path to prepend to the routes of D-Tale. This is used when making use of
+                     Jupyterhub server proxy
+    :type app_root: str, optional
+    :param allow_cell_edits: If false, this will not allow users to edit cells directly in their D-Tale grid
+    :type allow_cell_edits: bool, optional
+    :param inplace: If true, this will call `reset_index(inplace=True)` on the dataframe used as a way to save memory.
+                    Otherwise this will create a brand new dataframe, thus doubling memory but leaving the dataframe
+                    input unchanged.
+    :type inplace: bool, optional
+    :param drop_index: If true, this will drop any pre-existing index on the dataframe input.
+    :type drop_index: bool, optional
 
     :Example:
 
@@ -610,6 +624,9 @@ def show(
             name=name,
             context_vars=context_vars,
             ignore_duplicate=ignore_duplicate,
+            allow_cell_edits=allow_cell_edits,
+            inplace=inplace,
+            drop_index=drop_index,
         )
         is_active = not running_with_flask_debug() and is_up(app_url)
         if is_active:

diff --git a/dtale/charts/utils.py b/dtale/charts/utils.py
@@ -67,6 +67,10 @@ def valid_chart(chart_type=None, x=None, y=None, z=None, **inputs):
             return True
         return False
 
+    if chart_type == "candlestick":
+        cs_props = ["cs_x", "cs_open", "cs_close", "cs_high", "cs_low"]
+        return all(inputs.get(p) is not None for p in cs_props)
+
     if x is None or not len(y or []):
         return False
 

diff --git a/dtale/cli/loaders/__init__.py b/dtale/cli/loaders/__init__.py
@@ -4,7 +4,7 @@
 
 import click
 
-from dtale.cli.loaders import arctic_loader, csv_loader, json_loader
+from dtale.cli.loaders import arctic_loader, csv_loader, json_loader, parquet_loader
 
 logger = getLogger(__name__)
 
@@ -78,6 +78,7 @@ def custom_module_loader():
     arctic_loader.LOADER_KEY: arctic_loader,
     csv_loader.LOADER_KEY: csv_loader,
     json_loader.LOADER_KEY: json_loader,
+    parquet_loader.LOADER_KEY: parquet_loader,
 }
 
 

diff --git a/dtale/cli/loaders/parquet_loader.py b/dtale/cli/loaders/parquet_loader.py
@@ -0,0 +1,52 @@
+import pandas as pd
+import requests
+from six import BytesIO
+
+from dtale.app import show
+from dtale.cli.clickutils import get_loader_options, loader_prop_keys
+
+"""
+  IMPORTANT!!! These global variables are required for building any customized CLI loader.
+  When build_loaders runs startup it will search for any modules containing the global variable LOADER_KEY.
+"""
+LOADER_KEY = "parquet"
+LOADER_PROPS = [
+    dict(name="path", help="path to parquet file or URL to parquet endpoint"),
+    dict(name="engine", help="parquet library to use"),
+]
+
+
+# IMPORTANT!!! This function is required if you would like to be able to use this loader from the back-end.
+def show_loader(**kwargs):
+    return show(data_loader=lambda: loader_func(**kwargs), **kwargs)
+
+
+def loader_func(**kwargs):
+    path = kwargs.pop("path")
+    return pd.read_parquet(
+        path, **{k: v for k, v in kwargs.items() if k in loader_prop_keys(LOADER_PROPS)}
+    )
+
+
+# IMPORTANT!!! This function is required for building any customized CLI loader.
+def find_loader(kwargs):
+    """
+    JSON implementation of data loader which will return a function if any of the
+    `click` options based on LOADER_KEY & LOADER_PROPS have been used, otherwise return None
+
+    :param kwargs: Optional keyword arguments to be passed from `click`
+    :return: data loader function for Parquet implementation
+    """
+    parquet_opts = get_loader_options(LOADER_KEY, kwargs)
+    if len([f for f in parquet_opts.values() if f]):
+
+        def _json_loader():
+            parquet_arg_parsers = {}  # TODO: add additional arg parsers
+            kwargs = {
+                k: parquet_arg_parsers.get(k, lambda v: v)(v)
+                for k, v in json_opts.items()
+            }
+            return loader_func(**kwargs)
+
+        return _json_loader
+    return None
diff --git a/dtale/cli/script.py b/dtale/cli/script.py
@@ -24,6 +24,11 @@
     help="flag to automatically open default web browser on startup",
 )
 @click.option("--name", type=str, help="name to apply to your D-Tale session")
+@click.option(
+    "--no-cell-edits",
+    is_flag=True,
+    help="flag to turn off auto-reaping (process cleanup after period of inactivity)",
+)
 @setup_loader_options()
 @click.option("--log", "logfile", help="Log file name")
 @click.option(
@@ -41,6 +46,7 @@ def main(
     no_reaper=False,
     open_browser=False,
     name=None,
+    no_cell_edits=False,
     **kwargs
 ):
     """
@@ -65,6 +71,7 @@ def main(
         reaper_on=not no_reaper,
         open_browser=open_browser,
         name=name,
+        allow_cell_edits=not no_cell_edits,
         **kwargs
     )
 

diff --git a/dtale/column_builders.py b/dtale/column_builders.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 from scipy.stats import mstats
+from six import string_types
 
 import dtale.global_state as global_state
 from dtale.utils import classify_type
@@ -351,7 +352,32 @@ def build_column(self, data):
                 return pd.Series(
                     s.astype("float").astype("int"), name=self.name, index=s.index
                 )
+            elif to_type == "float":
+                return pd.Series(
+                    pd.to_numeric(s, errors="coerce"), name=self.name, index=s.index
+                )
             else:
+                if from_type.startswith("mixed"):
+                    if to_type == "float":
+                        return pd.Series(
+                            pd.to_numeric(s, errors="coerce"),
+                            name=self.name,
+                            index=s.index,
+                        )
+                    elif to_type == "bool":
+
+                        def _process_mixed_bool(v):
+                            if isinstance(v, bool):
+                                return v
+                            if isinstance(v, string_types):
+                                return dict(true=True, false=False).get(
+                                    v.lower(), np.nan
+                                )
+                            return np.nan
+
+                        return pd.Series(
+                            s.apply(_process_mixed_bool), name=self.name, index=s.index
+                        )
                 return pd.Series(s.astype(to_type), name=self.name, index=s.index)
         elif classifier == "I":  # date, float, category, str, bool
             if to_type == "date":
@@ -405,7 +431,27 @@ def build_inner_code(self):
                 return "pd.Series({s}.astype('float').astype('int'), name='{name}', index={s}.index)".format(
                     s=s, name=self.name
                 )
+            elif to_type == "float":
+                return "pd.Series(pd.to_numeric({s}, errors='coerce'), name='{name}', index={s}.index)".format(
+                    s=s, name=self.name
+                )
             else:
+                if from_type.startswith("mixed"):
+                    if to_type == "float":
+                        return "pd.Series(pd.to_numeric({s}, errors='coerce'), name='{name}', index={s}.index)".format(
+                            s=s, name=self.name
+                        )
+                    elif to_type == "bool":
+                        return (
+                            "def _process_mixed_bool(v):\n"
+                            "from six import string_types\n\n"
+                            "\tif isinstance(v, bool):\n"
+                            "\t\treturn v\n"
+                            "\tif isinstance(v, string_types):\n"
+                            "\t\treturn dict(true=True, false=False).get(v.lower(), np.nan)\n"
+                            "\treturn np.nan\n\n"
+                            "pd.Series({s}.apply(_process_mixed_bool), name='{name}', index={s}.index)"
+                        ).format(s=s, name=self.name)
                 return "pd.Series({s}.astype({to_type}), name='{name}', index={s}.index)".format(
                     s=s, to_type=to_type, name=self.name
                 )