Skip to content

Commit

Permalink
add sort_dict and more doc
Browse files Browse the repository at this point in the history
  • Loading branch information
wspr committed Apr 2, 2024
1 parent 7ae5126 commit d0bd1de
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 41 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

* Add `label_duplicate` option to avoid printing redundant labels.
* Add `other_thresh_XX` option to allow recategorisation of entries with values below a certain threshold.
* Internal code changes using OOP methods to assist further features.
* Add `sort_dict` to override sort order for individual labels.
* Internal code changes using OOP methods to tidy up.


## 2024-03-25 v1.2.1
Expand Down
86 changes: 46 additions & 40 deletions ausankey/ausankey.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ class Sankey:
* `"bottom"`: data is sorted with largest entries on bottom
* `"none"`: data is presented in the same order as it (first) appears in the DataFrame
sort_dict : dict
Override the weight sum used to sort nodes by the value specified in the dict.
Typically used to force particular categories to the top or bottom.
titles : list of str
Array of title strings for each columns
Expand Down Expand Up @@ -271,6 +275,7 @@ def __init__(
title_loc="inner", # "outer"
title_font=None,
sort="bottom", # "top", "bottom", "none"
sort_dict=None,
valign="bottom", # "top","center"
value_format=".0f",
value_gap=None,
Expand Down Expand Up @@ -314,6 +319,7 @@ def __init__(
self.title_loc = title_loc
self.title_side = title_side
self.sort = sort
self.sort_dict = sort_dict or {}
self.valign = valign
self.value_format = value_format
self.value_gap = label_gap if value_gap is None else value_gap
Expand Down Expand Up @@ -352,7 +358,7 @@ def setup(self, data):
# sort and calc
self.plot_height_nom = max(self.weight_sum)
for ii in range(self.num_stages):
self.node_sizes[ii] = sort_dict(self.node_sizes[ii], self.sort)
self.node_sizes[ii] = self.sort_node_sizes(self.node_sizes[ii], self.sort)
col_hgt[ii] = self.weight_sum[ii] + (len(self.nodes_uniq[ii]) - 1) * self.node_gap * self.plot_height_nom

# overall dimensions
Expand Down Expand Up @@ -467,9 +473,9 @@ def subplot(self, ii):
self.data[weightind + lastind],
]

nodes_lr = [
sort_nodes(labels_lr[0], self.node_sizes[ii]),
sort_nodes(labels_lr[1], self.node_sizes[ii + 1]),
nodes_lr =[
self.sort_nodes(labels_lr[0], self.node_sizes[ii]),
self.sort_nodes(labels_lr[1], self.node_sizes[ii + 1]),
]

# Determine sizes of individual subflows
Expand Down Expand Up @@ -749,52 +755,52 @@ def draw_title(self, x, y, label, va):
###########################################


def sort_nodes(lbl, node_sizes):
"""creates a sorted list of labels by their summed weights"""

arr = {}
for uniq in lbl.unique():
if uniq is not None:
arr[uniq] = True

sort_arr = sorted(
arr.items(),
key=lambda item: list(node_sizes).index(item[0]),
# sorting = 0,1,-1 affects this
)

return list(dict(sort_arr).keys())
def sort_nodes(self,lbl, node_sizes):
"""creates a sorted list of labels by their summed weights"""

arr = {}
for uniq in lbl.unique():
if uniq is not None:
arr[uniq] = True

sort_arr = sorted(
arr.items(),
key=lambda item: list(node_sizes).index(item[0]),
)

return list(dict(sort_arr).keys())


###########################################


def sort_dict(lbl, sorting):
"""creates a sorted list of labels by their summed weights"""
def sort_node_sizes(self,lbl, sorting):
"""creates a sorted list of labels by their summed weights"""

if sorting == "top":
s = 1
elif sorting == "bottom":
s = -1
elif sorting == "center":
s = 1
else:
s = 0
if sorting == "top":
s = 1
elif sorting == "bottom":
s = -1
elif sorting == "center":
s = 1
else:
s = 0

sort_arr = sorted(
lbl.items(),
key=lambda item: s * item[1],
# sorting = 0,1,-1 affects this
)
sort_arr = sorted(
lbl.items(),
key=lambda item:
s * self.sort_dict.get(item[0], item[1]),
# sorting = 0,1,-1 affects this
)

sorted_labels = dict(sort_arr)
sorted_labels = dict(sort_arr)

if sorting == "center":
# this kinda works but i dont think it's a good idea because you lose perception of relative sizes
# probably has an off-by-one even/odd error
sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2]
if sorting == "center":
# this kinda works but i dont think it's a good idea because you lose perception of relative sizes
# probably has an off-by-one even/odd error
sorted_labels = sorted_labels[1::2] + sorted_labels[-1::-2]

return sorted_labels
return sorted_labels


###########################################
Expand Down
24 changes: 24 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,9 @@ sky.sankey(data,sort=“none”)
```
![Image with options](fruits_sort_none.png)

The sort order of individual labels can be overidden using the `sort_dict`.
E.g., `sort_dict={"a",0}` would sort label `a` to the bottom of each stage regardless of its weighted node values.

## Vertical Alignment

The vertical alignment of the diagram can be `”top”`, `”bottom”`, or `”center”`:
Expand All @@ -134,6 +137,7 @@ sky.sankey(data,valign = “center”)
```
![Image with options](fruits_valign.png)


## Labels

If the data is generated externally it may not be convenient to edit the label text in the source. The typeset labels can be specified using a dictionary of lookup strings:
Expand Down Expand Up @@ -187,6 +191,25 @@ sky.sankey(
![Image with options](frame3_labels_dup.png)


## Values

The numerical values for each (sub)flow are annotated by default.
(It's slightly confusing that labels refer to nodes whereas values
refer to flows.) These can be customised or turned off.

The locations of the values can be specified according to whether they correspond to the flows in the first, middle, or right of the diagram:

value_loc = [ <loc_l> , <loc_m>, <loc_r> ]

Allowable values for `<loc_l>` and `<loc_r>` are `"left"`, `"right"`, `"both"`, or `"none"`.
The default settings are:

value_loc = [ "both", "right", "right" ]

These defaults are intended to a avoid clashes with the default `label_loc` settings.
Currently there is no way to de-duplicate repeated values.


## Titles

Titles for each column of data can be added:
Expand Down Expand Up @@ -305,6 +328,7 @@ sky.sankey( data,
frame_gap = 0.2 ,
label_width = 0.3 ,
label_gap = 0.02,
value_gap = 0.03,
title_gap = 0.1 ,
)
```
Expand Down

0 comments on commit d0bd1de

Please sign in to comment.