add sort_dict and more doc

AUMAG · Apr 2, 2024 · d0bd1de · d0bd1de
1 parent 7ae5126
commit d0bd1de
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 41 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,7 +4,8 @@
 
 * Add `label_duplicate` option to avoid printing redundant labels.
 * Add `other_thresh_XX` option to allow recategorisation of entries with values below a certain threshold.
-* Internal code changes using OOP methods to assist further features.
+* Add `sort_dict` to override sort order for individual labels.
+* Internal code changes using OOP methods to tidy up.
 
 
 ## 2024-03-25 v1.2.1

diff --git a/ausankey/ausankey.py b/ausankey/ausankey.py
@@ -182,6 +182,10 @@ class Sankey:
         * `"bottom"`: data is sorted with largest entries on bottom
         * `"none"`: data is presented in the same order as it (first) appears in the DataFrame
 
+    sort_dict : dict
+        Override the weight sum used to sort nodes by the value specified in the dict.
+        Typically used to force particular categories to the top or bottom.
+
     titles : list of str
         Array of title strings for each columns
 
@@ -271,6 +275,7 @@ def __init__(
         title_loc="inner",  # "outer"
         title_font=None,
         sort="bottom",  # "top", "bottom", "none"
+        sort_dict=None,
         valign="bottom",  # "top","center"
         value_format=".0f",
         value_gap=None,
@@ -314,6 +319,7 @@ def __init__(
         self.title_loc = title_loc
         self.title_side = title_side
         self.sort = sort
+        self.sort_dict = sort_dict or {}
         self.valign = valign
         self.value_format = value_format
         self.value_gap = label_gap if value_gap is None else value_gap
@@ -352,7 +358,7 @@ def setup(self, data):
         # sort and calc
         self.plot_height_nom = max(self.weight_sum)
         for ii in range(self.num_stages):
-            self.node_sizes[ii] = sort_dict(self.node_sizes[ii], self.sort)
+            self.node_sizes[ii] = self.sort_node_sizes(self.node_sizes[ii], self.sort)
             col_hgt[ii] = self.weight_sum[ii] + (len(self.nodes_uniq[ii]) - 1) * self.node_gap * self.plot_height_nom
 
         # overall dimensions
@@ -467,9 +473,9 @@ def subplot(self, ii):
             self.data[weightind + lastind],
         ]
 
-        nodes_lr = [
-            sort_nodes(labels_lr[0], self.node_sizes[ii]),
-            sort_nodes(labels_lr[1], self.node_sizes[ii + 1]),
+        nodes_lr =[
+            self.sort_nodes(labels_lr[0], self.node_sizes[ii]),
+            self.sort_nodes(labels_lr[1], self.node_sizes[ii + 1]),
         ]
 
         # Determine sizes of individual subflows
@@ -749,52 +755,52 @@ def draw_title(self, x, y, label, va):
 ###########################################
 
 
-def sort_nodes(lbl, node_sizes):
-    """creates a sorted list of labels by their summed weights"""
-
-    arr = {}
-    for uniq in lbl.unique():
-        if uniq is not None:
-            arr[uniq] = True
-
-    sort_arr = sorted(
-        arr.items(),
-        key=lambda item: list(node_sizes).index(item[0]),
-        # sorting = 0,1,-1 affects this
-    )
-
-    return list(dict(sort_arr).keys())
+    def sort_nodes(self,lbl, node_sizes):
+        """creates a sorted list of labels by their summed weights"""
+
+        arr = {}
+        for uniq in lbl.unique():
+            if uniq is not None:
+                arr[uniq] = True
+
+        sort_arr = sorted(
+            arr.items(),
+            key=lambda item: list(node_sizes).index(item[0]),
+        )
+
+        return list(dict(sort_arr).keys())
 
 
 ###########################################
 
 
-def sort_dict(lbl, sorting):
-    """creates a sorted list of labels by their summed weights"""
+    def sort_node_sizes(self,lbl, sorting):
+        """creates a sorted list of labels by their summed weights"""
 
-    if sorting == "top":
-        s = 1
-    elif sorting == "bottom":
-        s = -1
-    elif sorting == "center":
-        s = 1
-    else:
-        s = 0
+        if sorting == "top":
+            s = 1
+        elif sorting == "bottom":
+            s = -1
+        elif sorting == "center":
+            s = 1
+        else:
+            s = 0
 
-    sort_arr = sorted(
-        lbl.items(),
-        key=lambda item: s * item[1],
-        # sorting = 0,1,-1 affects this
-    )
+        sort_arr = sorted(
+            lbl.items(),
+            key=lambda item:
+                s * self.sort_dict.get(item[0], item[1]),
+            # sorting = 0,1,-1 affects this
+        )
 
-    sorted_labels = dict(sort_arr)
+        sorted_labels = dict(sort_arr)
 
-    if sorting == "center":
-        # this kinda works but i dont think it's a good idea because you lose perception of relative sizes
-        # probably has an off-by-one even/odd error
-        sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2]
+        if sorting == "center":
+            # this kinda works but i dont think it's a good idea because you lose perception of relative sizes
+            # probably has an off-by-one even/odd error
+            sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2]
 
-    return sorted_labels
+        return sorted_labels
 
 
 ###########################################

diff --git a/docs/index.md b/docs/index.md
@@ -126,6 +126,9 @@ sky.sankey(data,sort=“none”)
 ```
 ![Image with options](fruits_sort_none.png)
 
+The sort order of individual labels can be overidden using the `sort_dict`.
+E.g., `sort_dict={"a",0}` would sort label `a` to the bottom of each stage regardless of its weighted node values.
+
 ## Vertical Alignment
 
 The vertical alignment of the diagram can be  `”top”`, `”bottom”`, or `”center”`:
@@ -134,6 +137,7 @@ sky.sankey(data,valign = “center”)
 ```
 ![Image with options](fruits_valign.png)
 
+
 ## Labels
 
 If the data is generated externally it may not be convenient to edit the label text in the source. The typeset labels can be specified using a dictionary of lookup strings:
@@ -187,6 +191,25 @@ sky.sankey(
 ![Image with options](frame3_labels_dup.png)
 
 
+## Values
+
+The numerical values for each (sub)flow are annotated by default. 
+(It's slightly confusing that labels refer to nodes whereas values
+refer to flows.) These can be customised or turned off.
+
+The locations of the values can be specified according to whether they correspond to the flows in the first, middle, or right of the diagram:
+
+    value_loc = [ <loc_l> , <loc_m>, <loc_r> ]
+
+Allowable values for `<loc_l>` and `<loc_r>` are `"left"`, `"right"`,  `"both"`, or `"none"`.
+The default settings are:
+
+    value_loc = [ "both", "right", "right" ]
+
+These defaults are intended to a avoid clashes with the default `label_loc` settings.
+Currently there is no way to de-duplicate repeated values.
+
+
 ## Titles
 
 Titles for each column of data can be added:
@@ -305,6 +328,7 @@ sky.sankey( data,
             frame_gap   = 0.2 ,
             label_width = 0.3 ,
             label_gap   = 0.02,
+            value_gap   = 0.03,
             title_gap   = 0.1 ,
           )
 ```