From dccbdf887cc7c1d5e5f89369457ee4b974a95299 Mon Sep 17 00:00:00 2001 From: Rob L Date: Wed, 13 Mar 2024 23:06:10 -0400 Subject: [PATCH 1/6] proposed addition of a df.agg stacked go.bar example --- doc/python/bar-charts.md | 51 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index 9e9b30755b9..6c0b66b6fd3 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -304,6 +304,57 @@ fig.update_layout(barmode='stack') fig.show() ``` +### Stacked Bar Chart from aggregating a data frame + +Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands, which produce a wide format data set with one row for each bar component and a column for each bar, which is the transpose of the orientation of the px.bar wide data frame. Tranposing and updating the indexes is a somewhat involved option. Here is one straightforward way to aggregate a data set into a summarized form and present the results as a stacked bar. + +``` + +from plotly import graph_objects as go +import pandas as pd + +#get one year of gapminder data +url = 'https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv' +df = pd.read_csv(url) +df = df[df['year']==2007] +df["gdp"]=df["pop"]*df['gdpPercap'] + + +#build the summary of interest +df_summarized = df.groupby("continent", observed=True).agg("sum").reset_index() + +df_summarized["percent of world population"]=100*df_summarized["pop"]/df_summarized["pop"].sum() +df_summarized["percent of world GDP"]=100*df_summarized["gdp"]/df_summarized["gdp"].sum() + + +df2 = df_summarized[["continent", +"percent of world population", +"percent of world GDP", +]] + +#we now have a wide data frame, but it's in the opposite orientation from the one that px is designed to deal with. +#transposing it and rebuilding the indexes is an option, but iterating through the DF using graph objects is more succinct. + +fig=go.Figure() +for category in df_summarized["continent"].values: + fig.add_trace(go.Bar( + x=df2.columns[1:], + #we need to get a pandas series that contains just the values to graph; + #we do so by selecting the right row, selecting the right columns + #and then tranposing and using iloc to convert to a series + #here, I assume that the bar element category variable is in column 0 + y=list(df2.loc[df2["continent"]==category][list(df2.columns[1:])].transpose().iloc[:,0]), + name=str(category) + + + ) +) +fig.update_layout(barmode="stack") + +fig.show() +``` + + ### Bar Chart with Hover Text ```python From b8a198da1be086df62ea9988d512f0ed1f220850 Mon Sep 17 00:00:00 2001 From: Rob L Date: Wed, 13 Mar 2024 23:17:49 -0400 Subject: [PATCH 2/6] Update bar-charts.md renamed df2 to df --- doc/python/bar-charts.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index 6c0b66b6fd3..a2ef93572d4 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -327,7 +327,7 @@ df_summarized["percent of world population"]=100*df_summarized["pop"]/df_summari df_summarized["percent of world GDP"]=100*df_summarized["gdp"]/df_summarized["gdp"].sum() -df2 = df_summarized[["continent", +df = df_summarized[["continent", "percent of world population", "percent of world GDP", ]] @@ -338,12 +338,12 @@ df2 = df_summarized[["continent", fig=go.Figure() for category in df_summarized["continent"].values: fig.add_trace(go.Bar( - x=df2.columns[1:], + x=df.columns[1:], #we need to get a pandas series that contains just the values to graph; #we do so by selecting the right row, selecting the right columns #and then tranposing and using iloc to convert to a series #here, I assume that the bar element category variable is in column 0 - y=list(df2.loc[df2["continent"]==category][list(df2.columns[1:])].transpose().iloc[:,0]), + y=list(df.loc[df["continent"]==category][list(df.columns[1:])].transpose().iloc[:,0]), name=str(category) From 32a2c0cc2950eab38b78b606b08849bedc6adf62 Mon Sep 17 00:00:00 2001 From: Rob L Date: Wed, 13 Mar 2024 23:19:25 -0400 Subject: [PATCH 3/6] Update bar-charts.md --- doc/python/bar-charts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index a2ef93572d4..90672e7117d 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -306,7 +306,7 @@ fig.show() ### Stacked Bar Chart from aggregating a data frame -Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands, which produce a wide format data set with one row for each bar component and a column for each bar, which is the transpose of the orientation of the px.bar wide data frame. Tranposing and updating the indexes is a somewhat involved option. Here is one straightforward way to aggregate a data set into a summarized form and present the results as a stacked bar. +Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands. DF.agg() which produces a wide format data set with one row for each bar component and a column for each bar, which is the transpose of the orientation of the px.bar wide data frame. Tranposing and updating the indexes is a somewhat involved option. Here is one straightforward way to aggregate a data set into a summarized form and present the results as a stacked bar. ``` From 5e8942781ad757d33f0c5204ab169f93edc25f50 Mon Sep 17 00:00:00 2001 From: Mojtaba Samimi <33888540+archmoj@users.noreply.github.com> Date: Fri, 22 Mar 2024 10:32:03 -0400 Subject: [PATCH 4/6] Update doc/python/bar-charts.md Co-authored-by: Liam Connors --- doc/python/bar-charts.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index 90672e7117d..af12665a018 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -304,7 +304,7 @@ fig.update_layout(barmode='stack') fig.show() ``` -### Stacked Bar Chart from aggregating a data frame +### Stacked Bar Chart From Aggregating a DataFrame Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands. DF.agg() which produces a wide format data set with one row for each bar component and a column for each bar, which is the transpose of the orientation of the px.bar wide data frame. Tranposing and updating the indexes is a somewhat involved option. Here is one straightforward way to aggregate a data set into a summarized form and present the results as a stacked bar. From eca3711d6dc0fe3bd5b4712137a9623c8e9d9637 Mon Sep 17 00:00:00 2001 From: Liam Connors Date: Wed, 10 Apr 2024 09:33:53 -0400 Subject: [PATCH 5/6] Update doc/python/bar-charts.md --- doc/python/bar-charts.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index af12665a018..213bc9f9bc5 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -313,14 +313,14 @@ Stacked bar charts are a powerful way to present results summarizing categories from plotly import graph_objects as go import pandas as pd -#get one year of gapminder data +# Get one year of gapminder data url = 'https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv' df = pd.read_csv(url) df = df[df['year']==2007] df["gdp"]=df["pop"]*df['gdpPercap'] -#build the summary of interest +# Build the summary of interest df_summarized = df.groupby("continent", observed=True).agg("sum").reset_index() df_summarized["percent of world population"]=100*df_summarized["pop"]/df_summarized["pop"].sum() @@ -332,17 +332,17 @@ df = df_summarized[["continent", "percent of world GDP", ]] -#we now have a wide data frame, but it's in the opposite orientation from the one that px is designed to deal with. -#transposing it and rebuilding the indexes is an option, but iterating through the DF using graph objects is more succinct. +# We now have a wide data frame, but it's in the opposite orientation from the one that px is designed to deal with. +# Transposing it and rebuilding the indexes is an option, but iterating through the DF using graph objects is more succinct. fig=go.Figure() for category in df_summarized["continent"].values: fig.add_trace(go.Bar( x=df.columns[1:], - #we need to get a pandas series that contains just the values to graph; - #we do so by selecting the right row, selecting the right columns - #and then tranposing and using iloc to convert to a series - #here, I assume that the bar element category variable is in column 0 + # We need to get a pandas series that contains just the values to graph; + # We do so by selecting the right row, selecting the right columns + # and then transposing and using iloc to convert to a series + # Here, we assume that the bar element category variable is in column 0 y=list(df.loc[df["continent"]==category][list(df.columns[1:])].transpose().iloc[:,0]), name=str(category) From c3fbb647d6f4fe6ef5047927a6f54e4c2c525bd1 Mon Sep 17 00:00:00 2001 From: Liam Connors Date: Wed, 10 Apr 2024 09:34:07 -0400 Subject: [PATCH 6/6] Update doc/python/bar-charts.md --- doc/python/bar-charts.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/python/bar-charts.md b/doc/python/bar-charts.md index 213bc9f9bc5..635a348d241 100644 --- a/doc/python/bar-charts.md +++ b/doc/python/bar-charts.md @@ -306,8 +306,7 @@ fig.show() ### Stacked Bar Chart From Aggregating a DataFrame -Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands. DF.agg() which produces a wide format data set with one row for each bar component and a column for each bar, which is the transpose of the orientation of the px.bar wide data frame. Tranposing and updating the indexes is a somewhat involved option. Here is one straightforward way to aggregate a data set into a summarized form and present the results as a stacked bar. - +Stacked bar charts are a powerful way to present results summarizing categories generated using the Pandas aggregate commands. `pandas.DataFrame.agg` produces a wide data set format incompatible with `px.bar`. Transposing and updating the indexes to achieve `px.bar` compatibility is a somewhat involved option. Here is one straightforward alternative, which presents the aggregated data as a stacked bar using plotly.graph_objects. ``` from plotly import graph_objects as go