diff --git a/CHANGELOG.md b/CHANGELOG.md index a645eb8..7a8e485 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,8 @@ * Add `label_duplicate` option to avoid printing redundant labels. * Add `other_thresh_XX` option to allow recategorisation of entries with values below a certain threshold. -* Internal code changes using OOP methods to assist further features. +* Add `sort_dict` to override sort order for individual labels. +* Internal code changes using OOP methods to tidy up. ## 2024-03-25 v1.2.1 diff --git a/ausankey/ausankey.py b/ausankey/ausankey.py index 19c59e2..ba43626 100644 --- a/ausankey/ausankey.py +++ b/ausankey/ausankey.py @@ -182,6 +182,10 @@ class Sankey: * `"bottom"`: data is sorted with largest entries on bottom * `"none"`: data is presented in the same order as it (first) appears in the DataFrame + sort_dict : dict + Override the weight sum used to sort nodes by the value specified in the dict. + Typically used to force particular categories to the top or bottom. + titles : list of str Array of title strings for each columns @@ -271,6 +275,7 @@ def __init__( title_loc="inner", # "outer" title_font=None, sort="bottom", # "top", "bottom", "none" + sort_dict=None, valign="bottom", # "top","center" value_format=".0f", value_gap=None, @@ -314,6 +319,7 @@ def __init__( self.title_loc = title_loc self.title_side = title_side self.sort = sort + self.sort_dict = sort_dict or {} self.valign = valign self.value_format = value_format self.value_gap = label_gap if value_gap is None else value_gap @@ -352,7 +358,7 @@ def setup(self, data): # sort and calc self.plot_height_nom = max(self.weight_sum) for ii in range(self.num_stages): - self.node_sizes[ii] = sort_dict(self.node_sizes[ii], self.sort) + self.node_sizes[ii] = self.sort_node_sizes(self.node_sizes[ii], self.sort) col_hgt[ii] = self.weight_sum[ii] + (len(self.nodes_uniq[ii]) - 1) * self.node_gap * self.plot_height_nom # overall dimensions @@ -467,9 +473,9 @@ def subplot(self, ii): self.data[weightind + lastind], ] - nodes_lr = [ - sort_nodes(labels_lr[0], self.node_sizes[ii]), - sort_nodes(labels_lr[1], self.node_sizes[ii + 1]), + nodes_lr =[ + self.sort_nodes(labels_lr[0], self.node_sizes[ii]), + self.sort_nodes(labels_lr[1], self.node_sizes[ii + 1]), ] # Determine sizes of individual subflows @@ -749,52 +755,52 @@ def draw_title(self, x, y, label, va): ########################################### -def sort_nodes(lbl, node_sizes): - """creates a sorted list of labels by their summed weights""" - - arr = {} - for uniq in lbl.unique(): - if uniq is not None: - arr[uniq] = True - - sort_arr = sorted( - arr.items(), - key=lambda item: list(node_sizes).index(item[0]), - # sorting = 0,1,-1 affects this - ) - - return list(dict(sort_arr).keys()) + def sort_nodes(self,lbl, node_sizes): + """creates a sorted list of labels by their summed weights""" + + arr = {} + for uniq in lbl.unique(): + if uniq is not None: + arr[uniq] = True + + sort_arr = sorted( + arr.items(), + key=lambda item: list(node_sizes).index(item[0]), + ) + + return list(dict(sort_arr).keys()) ########################################### -def sort_dict(lbl, sorting): - """creates a sorted list of labels by their summed weights""" + def sort_node_sizes(self,lbl, sorting): + """creates a sorted list of labels by their summed weights""" - if sorting == "top": - s = 1 - elif sorting == "bottom": - s = -1 - elif sorting == "center": - s = 1 - else: - s = 0 + if sorting == "top": + s = 1 + elif sorting == "bottom": + s = -1 + elif sorting == "center": + s = 1 + else: + s = 0 - sort_arr = sorted( - lbl.items(), - key=lambda item: s * item[1], - # sorting = 0,1,-1 affects this - ) + sort_arr = sorted( + lbl.items(), + key=lambda item: + s * self.sort_dict.get(item[0], item[1]), + # sorting = 0,1,-1 affects this + ) - sorted_labels = dict(sort_arr) + sorted_labels = dict(sort_arr) - if sorting == "center": - # this kinda works but i dont think it's a good idea because you lose perception of relative sizes - # probably has an off-by-one even/odd error - sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2] + if sorting == "center": + # this kinda works but i dont think it's a good idea because you lose perception of relative sizes + # probably has an off-by-one even/odd error + sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2] - return sorted_labels + return sorted_labels ########################################### diff --git a/docs/index.md b/docs/index.md index db87032..86eb408 100644 --- a/docs/index.md +++ b/docs/index.md @@ -126,6 +126,9 @@ sky.sankey(data,sort=“none”) ``` ![Image with options](fruits_sort_none.png) +The sort order of individual labels can be overidden using the `sort_dict`. +E.g., `sort_dict={"a",0}` would sort label `a` to the bottom of each stage regardless of its weighted node values. + ## Vertical Alignment The vertical alignment of the diagram can be `”top”`, `”bottom”`, or `”center”`: @@ -134,6 +137,7 @@ sky.sankey(data,valign = “center”) ``` ![Image with options](fruits_valign.png) + ## Labels If the data is generated externally it may not be convenient to edit the label text in the source. The typeset labels can be specified using a dictionary of lookup strings: @@ -187,6 +191,25 @@ sky.sankey( ![Image with options](frame3_labels_dup.png) +## Values + +The numerical values for each (sub)flow are annotated by default. +(It's slightly confusing that labels refer to nodes whereas values +refer to flows.) These can be customised or turned off. + +The locations of the values can be specified according to whether they correspond to the flows in the first, middle, or right of the diagram: + + value_loc = [ , , ] + +Allowable values for `` and `` are `"left"`, `"right"`, `"both"`, or `"none"`. +The default settings are: + + value_loc = [ "both", "right", "right" ] + +These defaults are intended to a avoid clashes with the default `label_loc` settings. +Currently there is no way to de-duplicate repeated values. + + ## Titles Titles for each column of data can be added: @@ -305,6 +328,7 @@ sky.sankey( data, frame_gap = 0.2 , label_width = 0.3 , label_gap = 0.02, + value_gap = 0.03, title_gap = 0.1 , ) ```