From 21b38ab449ab6059b6e4219a79887b59840cf052 Mon Sep 17 00:00:00 2001 From: Nicholas Esterer Date: Wed, 8 Jul 2020 18:04:06 -0400 Subject: [PATCH 1/4] dendrogram works with scipy===1.5.0 default colors The default colorscale for _dendrogram contains color names compatible with the default colors given by scipy===1.5.0. It is still backwards compatible with older scipy versions. --- .../plotly/figure_factory/_dendrogram.py | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_dendrogram.py b/packages/python/plotly/plotly/figure_factory/_dendrogram.py index 380487e9a8..00c0d9762b 100644 --- a/packages/python/plotly/plotly/figure_factory/_dendrogram.py +++ b/packages/python/plotly/plotly/figure_factory/_dendrogram.py @@ -198,7 +198,7 @@ def get_color_dict(self, colorscale): default_colors = OrderedDict(sorted(d.items(), key=lambda t: t[0])) if colorscale is None: - colorscale = [ + rgb_colorscale = [ "rgb(0,116,217)", # blue "rgb(35,205,205)", # cyan "rgb(61,153,112)", # green @@ -206,13 +206,37 @@ def get_color_dict(self, colorscale): "rgb(133,20,75)", # magenta "rgb(255,65,54)", # red "rgb(255,255,255)", # white - "rgb(255,220,0)", - ] # yellow + "rgb(255,220,0)", # yellow + ] + else: + rgb_colorscale = colorscale for i in range(len(default_colors.keys())): k = list(default_colors.keys())[i] # PY3 won't index keys - if i < len(colorscale): - default_colors[k] = colorscale[i] + if i < len(rgb_colorscale): + default_colors[k] = rgb_colorscale[i] + + # add support for cyclic format colors as introduced in scipy===1.5.0 + # before this, color_list, from which the color_key is obtained was: + # ['g', 'r', 'b', 'c', 'm', 'b', 'b', 'b', 'b'], now it is + # ['C1', 'C2', 'C0', 'C3', 'C4', 'C0', 'C0', 'C0', 'C0'], so to keep the + # colors consistent regardless of the version of scipy, 'C1' is mapped + # to 'rgb(61,153,112)' (what 'g' was mapped to before), 'C2' is mapped + # to 'rgb(255,65,54)', etc. + cyclic_color_names = ["C%d" % (n,) for n in range(5)] + if colorscale is None: + cyclic_color_rgb = [ + "rgb(0,116,217)", + "rgb(61,153,112)", + "rgb(255,65,54)", + "rgb(35,205,205)", + "rgb(133,20,75)", + ] + else: + cyclic_color_rgb = colorscale + + for k, c in zip(cyclic_color_names, cyclic_color_rgb): + default_colors[k] = c return default_colors From 90e417d1bd3abfa7aa79fa801551e3499f4fc365 Mon Sep 17 00:00:00 2001 From: Nicholas Esterer Date: Thu, 9 Jul 2020 16:24:27 -0400 Subject: [PATCH 2/4] Map new color sequence to old color sequence This is done for ff.create_dendrogram. It was tried as much as possible to preserve the old color sequence, but this was not possible. Also improved the documentation of the colorscale argument. --- .../plotly/figure_factory/_dendrogram.py | 79 +++++++++++++------ 1 file changed, 56 insertions(+), 23 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_dendrogram.py b/packages/python/plotly/plotly/figure_factory/_dendrogram.py index 00c0d9762b..cb97eeddb0 100644 --- a/packages/python/plotly/plotly/figure_factory/_dendrogram.py +++ b/packages/python/plotly/plotly/figure_factory/_dendrogram.py @@ -32,7 +32,29 @@ def create_dendrogram( :param (ndarray) X: Matrix of observations as array of arrays :param (str) orientation: 'top', 'right', 'bottom', or 'left' :param (list) labels: List of axis category labels(observation labels) - :param (list) colorscale: Optional colorscale for dendrogram tree + :param (list) colorscale: Optional colorscale for dendrogram tree. To + totally replace the default colorscale, a custom + colorscale must contain 8 colors, corresponding + to when the underlying + scipy.cluster.hierarchy.dendrogram specifies + 'b', 'c', 'g', 'k', 'm', 'r', 'w', 'y', in that + order. So if you want 'b', 'c', 'g', 'k', to map + to rgb(255,0,0) and 'm', 'r', 'w', 'y', to map + to rgb(0,255,0), the colorscale should be + ['rgb(255,0,0)','rgb(255,0,0)','rgb(255,0,0)', + 'rgb(255,0,0)','rgb(0,255,0)','rgb(0,255,0)', + 'rgb(0,255,0)','rgb(0,255,0)',] If using + scipy >= 1.5.1, instead of the letters above, the + colors are specfied as 'C0', 'C1', etc. and in + that case the list corresponds to the colors: + 'C0', 'C3' or 'C9', 'C1' or 'C7', 'C6', 'C2', + 'C4', 'C8',, 'C5', 'C7', e.g., if + scipy.cluster.hierarchy.dendrogram uses the color + 'C3' or 'C9' this is mapped to the rgb value in + index 1, and there is not color that maps to index + 7, of the colorscale. If the colorscale has less + than 8 colors, the remaining colors remain the + default. :param (function) distfun: Function to compute the pairwise distance from the observations :param (function) linkagefun: Function to compute the linkage matrix from @@ -160,8 +182,8 @@ def __init__( if len(self.zero_vals) > len(yvals) + 1: # If the length of zero_vals is larger than the length of yvals, # it means that there are wrong vals because of the identicial samples. - # Three and more identicial samples will make the yvals of spliting center into 0 and it will \ - # accidentally take it as leaves. + # Three and more identicial samples will make the yvals of spliting + # center into 0 and it will accidentally take it as leaves. l_border = int(min(self.zero_vals)) r_border = int(max(self.zero_vals)) correct_leaves_pos = range( @@ -185,6 +207,9 @@ def get_color_dict(self, colorscale): # These are the color codes returned for dendrograms # We're replacing them with nicer colors + # This list is the colors that can be used by dendrogram, which were + # determined as the combination of the default above_threshold_color and + # the default color palette (see scipy/cluster/hierarchy.py) d = { "r": "red", "g": "green", @@ -193,6 +218,8 @@ def get_color_dict(self, colorscale): "m": "magenta", "y": "yellow", "k": "black", + # TODO: 'w' doesn't seem to be in the default color + # palette in scipy/cluster/hierarchy.py "w": "white", } default_colors = OrderedDict(sorted(d.items(), key=lambda t: t[0])) @@ -217,26 +244,32 @@ def get_color_dict(self, colorscale): default_colors[k] = rgb_colorscale[i] # add support for cyclic format colors as introduced in scipy===1.5.0 - # before this, color_list, from which the color_key is obtained was: - # ['g', 'r', 'b', 'c', 'm', 'b', 'b', 'b', 'b'], now it is - # ['C1', 'C2', 'C0', 'C3', 'C4', 'C0', 'C0', 'C0', 'C0'], so to keep the - # colors consistent regardless of the version of scipy, 'C1' is mapped - # to 'rgb(61,153,112)' (what 'g' was mapped to before), 'C2' is mapped - # to 'rgb(255,65,54)', etc. - cyclic_color_names = ["C%d" % (n,) for n in range(5)] - if colorscale is None: - cyclic_color_rgb = [ - "rgb(0,116,217)", - "rgb(61,153,112)", - "rgb(255,65,54)", - "rgb(35,205,205)", - "rgb(133,20,75)", - ] - else: - cyclic_color_rgb = colorscale - - for k, c in zip(cyclic_color_names, cyclic_color_rgb): - default_colors[k] = c + # before this, the colors were named 'r', 'b', 'y' etc., now they are + # named 'C0', 'C1', etc. To keep the colors consistent regardless of the + # scipy version, we try as much as possible to map the new colors to the + # old colors + # this mapping was found by inpecting scipy/cluster/hierarchy.py (see + # comment above). + new_old_color_map = [ + ("C0", "b"), + ("C1", "g"), + ("C2", "r"), + ("C3", "c"), + ("C4", "m"), + ("C5", "y"), + ("C6", "k"), + ("C7", "g"), + ("C8", "r"), + ("C9", "c"), + ] + for nc, oc in new_old_color_map: + try: + default_colors[nc] = default_colors[oc] + except KeyError: + # it could happen that the old color isn't found (if a custom + # colorscale was specified), in this case we set it to an + # arbitrary default. + default_colors[n] = "rgb(0,116,217)" return default_colors From 58a6f950d3d7d3897c75fee9f0c8d58da246b0f4 Mon Sep 17 00:00:00 2001 From: Nicholas Esterer Date: Fri, 10 Jul 2020 13:33:46 -0400 Subject: [PATCH 3/4] Updated documentation for colorscale of create_dendrogram --- .../plotly/figure_factory/_dendrogram.py | 34 ++++++------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_dendrogram.py b/packages/python/plotly/plotly/figure_factory/_dendrogram.py index cb97eeddb0..d3a9edcb65 100644 --- a/packages/python/plotly/plotly/figure_factory/_dendrogram.py +++ b/packages/python/plotly/plotly/figure_factory/_dendrogram.py @@ -25,36 +25,22 @@ def create_dendrogram( color_threshold=None, ): """ - Function that returns a dendrogram Plotly figure object. + Function that returns a dendrogram Plotly figure object. This is a thin + wrapper around scipy.cluster.hierarchy.dendrogram. See also https://dash.plot.ly/dash-bio/clustergram. :param (ndarray) X: Matrix of observations as array of arrays :param (str) orientation: 'top', 'right', 'bottom', or 'left' :param (list) labels: List of axis category labels(observation labels) - :param (list) colorscale: Optional colorscale for dendrogram tree. To - totally replace the default colorscale, a custom - colorscale must contain 8 colors, corresponding - to when the underlying - scipy.cluster.hierarchy.dendrogram specifies - 'b', 'c', 'g', 'k', 'm', 'r', 'w', 'y', in that - order. So if you want 'b', 'c', 'g', 'k', to map - to rgb(255,0,0) and 'm', 'r', 'w', 'y', to map - to rgb(0,255,0), the colorscale should be - ['rgb(255,0,0)','rgb(255,0,0)','rgb(255,0,0)', - 'rgb(255,0,0)','rgb(0,255,0)','rgb(0,255,0)', - 'rgb(0,255,0)','rgb(0,255,0)',] If using - scipy >= 1.5.1, instead of the letters above, the - colors are specfied as 'C0', 'C1', etc. and in - that case the list corresponds to the colors: - 'C0', 'C3' or 'C9', 'C1' or 'C7', 'C6', 'C2', - 'C4', 'C8',, 'C5', 'C7', e.g., if - scipy.cluster.hierarchy.dendrogram uses the color - 'C3' or 'C9' this is mapped to the rgb value in - index 1, and there is not color that maps to index - 7, of the colorscale. If the colorscale has less - than 8 colors, the remaining colors remain the - default. + :param (list) colorscale: Optional colorscale for the dendrogram tree. With + scipy<=1.4.1 requires 8 colors to be specified, + the 7th of which is ignored. With scipy>=1.5.0, + requires 10 colors. In this case the 8th color is + ignored and the 2nd, 3rd and 6th are used twice as + often as the others. Given a shorter list, the + missing values are replaced with defaults and with + a longer list the extra values are ignored. :param (function) distfun: Function to compute the pairwise distance from the observations :param (function) linkagefun: Function to compute the linkage matrix from From 969aba480e98694fbbf01f176df79407e7b8b3ee Mon Sep 17 00:00:00 2001 From: Nicholas Esterer Date: Mon, 13 Jul 2020 09:41:45 -0400 Subject: [PATCH 4/4] Improved doc string for dendrogram --- .../plotly/plotly/figure_factory/_dendrogram.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/packages/python/plotly/plotly/figure_factory/_dendrogram.py b/packages/python/plotly/plotly/figure_factory/_dendrogram.py index d3a9edcb65..4164d0a05b 100644 --- a/packages/python/plotly/plotly/figure_factory/_dendrogram.py +++ b/packages/python/plotly/plotly/figure_factory/_dendrogram.py @@ -33,14 +33,13 @@ def create_dendrogram( :param (ndarray) X: Matrix of observations as array of arrays :param (str) orientation: 'top', 'right', 'bottom', or 'left' :param (list) labels: List of axis category labels(observation labels) - :param (list) colorscale: Optional colorscale for the dendrogram tree. With - scipy<=1.4.1 requires 8 colors to be specified, - the 7th of which is ignored. With scipy>=1.5.0, - requires 10 colors. In this case the 8th color is - ignored and the 2nd, 3rd and 6th are used twice as - often as the others. Given a shorter list, the - missing values are replaced with defaults and with - a longer list the extra values are ignored. + :param (list) colorscale: Optional colorscale for the dendrogram tree. + Requires 8 colors to be specified, the 7th of + which is ignored. With scipy>=1.5.0, the 2nd, 3rd + and 6th are used twice as often as the others. + Given a shorter list, the missing values are + replaced with defaults and with a longer list the + extra values are ignored. :param (function) distfun: Function to compute the pairwise distance from the observations :param (function) linkagefun: Function to compute the linkage matrix from