Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion examples/energy_ratio/wake_steering_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
from flasc.energy_ratio import energy_ratio_suite
# from flasc import floris_tools as fsatools

from flasc.visualization import plot_layout_with_waking_directions
from flasc.visualization import plot_layout_with_waking_directions, plot_binned_mean_and_ci



if __name__ == "__main__":
Expand Down Expand Up @@ -119,6 +120,15 @@
'pow_002':power_wakesteering_downstream
})

# Use the function plot_binned_mean_and_ci to show the noise in wind speed
fig, ax = plt.subplots(1,1,sharex=True)
plot_binned_mean_and_ci(df_baseline.ws, df_baseline_noisy.ws, ax=ax)
ax.set_xlabel('Wind Speed (m/s) [Baseline]')
ax.set_ylabel('Wind Speed (m/s) [Baseline (Noisy)]')
ax.grid(True)



# Make a color palette that visually links the nominal and noisy data sets together
color_palette = sns.color_palette("Paired",4)[::-1]
# color_palette = ['r','g','b','k']
Expand Down
129 changes: 127 additions & 2 deletions flasc/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import scipy.stats as st


def plot_with_wrapping(
Expand Down Expand Up @@ -637,14 +639,137 @@ def put_label(i):
raise ValueError("Need one of near_i, near_x, near_y")


def plot_binned_mean_and_ci(
x,
y,
color="b",
label="_nolegend_",
x_edges=None,
ax=None,
show_scatter=True,
show_bin_points=True,
show_confidence=True,
alpha_scatter=0.1,
confidence_level = 0.95,
):
"""
Plot data to a single axis. Method
has options to include scatter of underlying data, specifiying
bin edges, and plotting confidence interval.

Args:
x (np.array): abscissa data.
y (np.array): ordinate data.
color (str, optional): line color.
Defaults to 'b'.
label (str, optional): line label used in legend.
Defaults to '_nolegend_'.
x_edges (np.array, optional): bin edges in x data
Defaults to None.
ax (:py:class:`matplotlib.pyplot.axes`, optional):
axes handle for plotting. Defaults to None.
show_scatter (bool, optional): flag to control scatter plot.
Defaults to True.
show_bin_points (bool, optional): flag to control plot of bins.
Defaults to True.
show_confidence (bool, optional): flag to control plot of
confidence interval. Defaults to True.
alpha_scatter (float, optional): Alpha for scatter
plot. Defaults to 0.5.
confidenceLevel (float, optional): Confidence level for
confidence interval. Defaults to 0.95.

"""

# Check the length of x equals length of y
if len(x) != len(y):
raise ValueError("x and y must be the same length")

# Check that x is not empty
if len(x) == 0:
raise ValueError("x is empty")



# Declare ax if not provided
if ax is None:
_, ax = plt.subplots()

# Put points ino dataframe
df = pd.DataFrame({"x": x, "y": y})

# If x_edges not provided, use 50 bins over range of x
if x_edges is None:
x_edges = np.linspace(df["x"].min()*.98, df["x"].max()*1.02, 50)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@paulf81 I changed the lower and upper edges from min*0.95, max*1.05 to min*0.98, max*1.02 to avoid numpy warnings that get caused because, combined the 50 bins, there will always be empty bins. An alternative, if you'd prefer, is to leave the edges at 0.95 and 1.05 but lower the bin count to 20, i.e.

x_edges = np.linspace(df["x"].min()*.95, df["x"].max()*1.05, 20)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do see that the warnings that I was trying to avoid are still being raised in the example, I think because the data isn't really spread along the x axis in a realistic way. For now, I don't think that's really a problem.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are using the example dataset, absolutely the data is mostly in really low wind speeds, indeed unrealistic. This has been an annoyance for me while working with this data, for sure. I wasn't sure if we wanted to fix this or just wait for the revision of the examples to finish.



# Define x_labels as bin centers
x_labels = (x_edges[1:] + x_edges[:-1]) / 2.0

# Bin data
df["x_bin"] = pd.cut(df["x"], x_edges, labels=x_labels)

# Get aggregate statistics
df_agg = df.groupby("x_bin").agg(
{"y": ["count", "std", "min", "max", "mean", st.sem]}
)
# Flatten column names
df_agg.columns = ["_".join(c) for c in df_agg.columns]

# Reset the index
df_agg = df_agg.reset_index()

# Delete rows with no data
df_agg = df_agg[df_agg["y_count"] > 0]

# Add the confidence interval of the mean to df_agg
df_agg["y_ci_lower"], df_agg["y_ci_upper"] = st.t.interval(
confidence_level,
df_agg["y_count"]-1,
loc=df_agg["y_mean"],
scale=df_agg["y_sem"]
)
Comment on lines +724 to +729
Copy link
Collaborator

@misi9170 misi9170 Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@paulf81 I think what you have, using the t-distribution, is good (it's a little more correct than the z-score approach, which assumes the data set is large enough that the means can be treated as normal---the t-distribution doesn't make that assumption, and approaches the z-score method as the number of samples becomes large). I've rearranged the code a little in my commits, but have not changed the method.


# Plot the mean values
ax.plot(df_agg.x_bin, df_agg.y_mean, color=color, label=label)

# Plot the confidence interval
if show_confidence:
ax.fill_between(
df_agg.x_bin,
df_agg.y_ci_lower,
df_agg.y_ci_upper,
color=color,
alpha=0.2,
)

# Plot a dasshed line at confidence interval
ax.plot(
df_agg.x_bin,
df_agg.y_ci_lower,
color=color,
alpha=0.2,
ls="--",
)
ax.plot(
df_agg.x_bin,
df_agg.y_ci_upper,
color=color,
alpha=0.2,
ls="--",
)

# Plot the scatter points
if show_scatter:
ax.scatter(df.x, df.y, color=color, s=10, alpha=alpha_scatter)

# Plot the bin points, scaled by the counts
if show_bin_points:
ax.scatter(
df_agg.x_bin,
df_agg.y_mean,
color=color,
s=df_agg.y_count / df_agg.y_count.max() * 20,
alpha=0.5,
marker='s'
)

return ax