Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reference suitability #11

Merged
merged 11 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 139 additions & 55 deletions examples/smarteole_example.ipynb

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ include = ["wind_up*"]
line-length = 120
target-version = "py310"
show-fixes = true
extend-exclude = ["tests"]

[tool.ruff.lint]
select = ["ALL"] # https://beta.ruff.rs/docs/rules/
Expand Down
21 changes: 12 additions & 9 deletions tests/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,18 @@
SAMPLE_OTHER = pd.date_range(pd.Timestamp("2000-01-01T05:00+05:00"), periods=2)


@pytest.mark.parametrize("t,exp", [
(pd.DatetimeIndex(SAMPLE_NAIVE), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.DatetimeIndex(SAMPLE_UTC), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.DatetimeIndex(SAMPLE_OTHER), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.Timestamp("2000-01-01T00:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
(pd.Timestamp("2000-01-01T00:00+00:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
(pd.Timestamp("2000-01-01T05:00+05:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
])
def test_ensure_utc(t, exp):
@pytest.mark.parametrize(
("t", "exp"),
[
(pd.DatetimeIndex(SAMPLE_NAIVE), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.DatetimeIndex(SAMPLE_UTC), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.DatetimeIndex(SAMPLE_OTHER), pd.DatetimeIndex(SAMPLE_UTC)),
(pd.Timestamp("2000-01-01T00:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
(pd.Timestamp("2000-01-01T00:00+00:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
(pd.Timestamp("2000-01-01T05:00+05:00"), pd.Timestamp("2000-01-01T00:00+00:00")),
],
)
def test_ensure_utc(t: pd.Timestamp, exp: pd.Timestamp) -> None:
actual = ensure_utc(t)

if isinstance(t, pd.Timestamp):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class TestWithParquetCache:
def test_creates_and_fetches_cache(self, tmp_path: Path):
def test_creates_and_fetches_cache(self, tmp_path: Path) -> None:
fp = tmp_path / "test.parquet"
sample_df = pd.DataFrame({"a": [1, 2, 3]})

Expand All @@ -22,7 +22,7 @@ def myfunc() -> pd.DataFrame:
df2 = myfunc()
pd.testing.assert_frame_equal(df2, sample_df)

def test_doesnt_run_the_func_if_file_exists(self, tmp_path: Path):
def test_doesnt_run_the_func_if_file_exists(self, tmp_path: Path) -> None:
fp = tmp_path / "test.parquet"
sample_df = pd.DataFrame({"a": [1, 2, 3]})
sample_df.to_parquet(fp)
Expand Down
8 changes: 6 additions & 2 deletions tests/test_northing.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ def test_apply_northing_corrections(test_lsa_t13_config: WindUpConfig) -> None:
assert median_yaw_after_northing["LSA_T13"] == pytest.approx(235.22855377197266)
assert median_yaw_after_northing["LSA_T14"] == pytest.approx(224.92881774902344)

abs_north_errs_before_northing = calc_max_abs_north_errs(wf_df, north_ref_wd_col=REANALYSIS_WD_COL,timebase_s=cfg.timebase_s)
abs_north_errs_after_northing = calc_max_abs_north_errs(wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL,timebase_s=cfg.timebase_s)
abs_north_errs_before_northing = calc_max_abs_north_errs(
wf_df, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
)
abs_north_errs_after_northing = calc_max_abs_north_errs(
wf_df_after_northing, north_ref_wd_col=REANALYSIS_WD_COL, timebase_s=cfg.timebase_s
)
assert abs_north_errs_before_northing.min() == pytest.approx(7.88920288085938)
assert abs_north_errs_before_northing.max() == pytest.approx(7.88920288085938)
assert abs_north_errs_after_northing["LSA_T07"] == pytest.approx(18.400006103515604)
Expand Down
22 changes: 19 additions & 3 deletions tests/test_pp_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -> None:
cfg = test_lsa_t13_config

test_wtg = next(x for x in cfg.asset.wtgs if x.name=="LSA_T13")
test_wtg = next(x for x in cfg.asset.wtgs if x.name == "LSA_T13")
ref_name = "LSA_T12"
detrend_ws_col = "ref_ws_detrended"
test_pw_col = "test_pw_clipped"
Expand Down Expand Up @@ -39,9 +39,25 @@ def test_pre_post_pp_analysis_with_reversal(test_lsa_t13_config: WindUpConfig) -
# minor changes to make actual_df compatible with old expected_df
expected_df["hours_for_mwh_calc"] = expected_df["hours_per_year"]
expected_df["hours_per_year"] = actual_df["hours_per_year"]
cols_with_new_calc = ["uplift_se", "uplift_p5_kw", "uplift_p95_kw"]
cols_with_new_calc = ["uplift_kw_se", "uplift_p5_kw", "uplift_p95_kw"]
expected_df[cols_with_new_calc] = actual_df[cols_with_new_calc]
new_cols = ["pre_valid", "post_valid", "hours_pre_raw", "hours_post_raw", "is_invalid_bin",'pw_at_mid_expected', 'pw_sem_at_mid_expected']
new_cols = [
"pre_valid",
"post_valid",
"hours_pre_raw",
"hours_post_raw",
"is_invalid_bin",
"pw_at_mid_expected",
"pw_sem_at_mid_expected",
"relative_cp_baseline",
"relative_cp_post",
"relative_cp_sem_at_mid_expected",
"relative_cp_sem_at_mid_post",
"uplift_relative_cp",
"uplift_relative_cp_se",
"uplift_relative_cp_p5",
"uplift_relative_cp_p95",
]
expected_df[new_cols] = actual_df[new_cols]
expected_df = expected_df[actual_df.columns]

Expand Down
1 change: 1 addition & 0 deletions tests/test_reanalysis_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
get_dsid_and_dates_from_filename,
)


def test_get_dsid_and_dates_from_filename() -> None:
assert get_dsid_and_dates_from_filename("ERA5T_47.50N_-3.25E_100m_1hr_19900101_20231031.parquet") == (
"ERA5T_47.50N_-3.25E_100m_1hr",
Expand Down
5 changes: 4 additions & 1 deletion tests/test_smart_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
add_smart_lat_long_to_cfg,
calc_last_xmin_datetime_in_month,
calc_month_list_and_time_info,
check_and_convert_scada_raw,
load_smart_md_from_file,
load_smart_scada_and_md_from_file,
)

TIMEBASE_PD_TIMEDELTA = pd.Timedelta("10min")


def test_calc_last_xmin_datetime_in_month() -> None:
inputs = [
dt.datetime(2020, 1, 13),
Expand All @@ -34,6 +35,7 @@ def test_calc_last_xmin_datetime_in_month() -> None:
for i, e in zip(inputs, expected, strict=True):
assert calc_last_xmin_datetime_in_month(i, TIMEBASE_PD_TIMEDELTA) == pd.Timestamp(e)


def test_add_smart_lat_long_to_cfg(test_marge_config: WindUpConfig) -> None:
cfg = test_marge_config
md_df = load_smart_md_from_file(asset_name="Marge Wind Farm", test_mode=True)
Expand Down Expand Up @@ -62,6 +64,7 @@ def test_calc_month_list_and_time_info() -> None:
assert smart_tz == "UTC"
assert smart_tf == "End"


def test_load_smart_scada_and_md_from_file() -> None:
test_data_dir = TEST_DATA_FLD / "smart_data" / "Marge Wind Farm"
first_datetime_utc_start = pd.Timestamp("2020-02-26 23:50:00", tz="UTC")
Expand Down
83 changes: 80 additions & 3 deletions wind_up/main_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
REANALYSIS_WD_COL,
REANALYSIS_WS_COL,
WINDFARM_YAWDIR_COL,
DataColumns,
)
from wind_up.detrend import apply_wsratio_v_wd_scen, calc_wsratio_v_wd_scen, check_applied_detrend
from wind_up.interface import AssessmentInputs, add_toggle_signals
Expand Down Expand Up @@ -362,6 +363,59 @@ def yaw_offset_results(
return results


def check_for_ops_curve_shift(
pre_df: pd.DataFrame,
post_df: pd.DataFrame,
*,
wtg_name: str,
ws_col: str,
pw_col: str,
rpm_col: str,
pt_col: str,
) -> dict[str, float]:
results_dict = {
"powercurve_shift": np.nan,
"rpm_shift": np.nan,
"pitch_shift": np.nan,
}
# check if all required columns are present
required_cols = [ws_col, pw_col, pt_col, rpm_col]
for req_col in required_cols:
if req_col not in pre_df.columns:
msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}"
result_manager.warning(msg)
return results_dict
if req_col not in post_df.columns:
msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}"
result_manager.warning(msg)
return results_dict
pre_dropna_df = pre_df.dropna(subset=[ws_col, pw_col, pt_col, rpm_col]).copy()
post_dropna_df = post_df.dropna(subset=[ws_col, pw_col, pt_col, rpm_col]).copy()

for descr, x_var, y_var, x_bin_width, warn_thresh in [
("powercurve_shift", ws_col, pw_col, 1, 0.01),
("rpm_shift", pw_col, rpm_col, 0, 0.005),
("pitch_shift", ws_col, pt_col, 1, 0.1),
]:
bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10
mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg(
x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"),
y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"),
)
post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"])
mean_df = post_dropna_df.mean()
if y_var == pt_col:
results_dict[descr] = mean_df[y_var] - mean_df["expected_y"]
else:
results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1)
if abs(results_dict[descr]) > warn_thresh:
result_manager.warning(
f"{wtg_name} check_for_ops_curve_shift abs {descr} > {warn_thresh}: {results_dict[descr]}"
)

return results_dict


def calc_test_ref_results(
*,
test_df: pd.DataFrame,
Expand Down Expand Up @@ -505,6 +559,16 @@ def calc_test_ref_results(
pre_df = pre_df.merge(ref_df, how="left", left_index=True, right_index=True)
post_df = post_df.merge(ref_df, how="left", left_index=True, right_index=True)

ref_ops_curve_shift_dict = check_for_ops_curve_shift(
pre_df,
post_df,
wtg_name=ref_name,
ws_col=ref_ws_col,
pw_col=ref_pw_col,
rpm_col=f"ref_{DataColumns.gen_rpm_mean}",
pt_col=f"ref_{DataColumns.pitch_angle_mean}",
)

pre_df = add_waking_scen(
test_ref_df=pre_df,
test_name=test_name,
Expand Down Expand Up @@ -608,9 +672,6 @@ def calc_test_ref_results(
toggle_name=cfg.toggle.name if cfg.toggle else None,
)

pre_df.to_parquet(cfg.out_dir / f"{test_wtg.name}_{ref_name}_pre_df.parquet")
post_df.to_parquet(cfg.out_dir / f"{test_wtg.name}_{ref_name}_post_df.parquet")

pp_results, pp_df = pre_post_pp_analysis_with_reversal_and_bootstrapping(
cfg=cfg,
test_wtg=test_wtg,
Expand All @@ -634,6 +695,9 @@ def calc_test_ref_results(
"ref_max_northing_error_v_wf": ref_max_northing_error_v_wf,
"ref_max_ws_drift": ref_max_ws_drift,
"ref_max_ws_drift_pp_period": ref_max_ws_drift_pp_period,
"ref_powercurve_shift": ref_ops_curve_shift_dict["powercurve_shift"],
"ref_rpm_shift": ref_ops_curve_shift_dict["rpm_shift"],
"ref_pitch_shift": ref_ops_curve_shift_dict["pitch_shift"],
"detrend_pre_r2_improvement": detrend_pre_r2_improvement,
"detrend_post_r2_improvement": detrend_post_r2_improvement,
"mean_power_pre": pre_df.dropna(subset=[detrend_ws_col, test_pw_col, ref_wd_col])[test_pw_col].mean(),
Expand Down Expand Up @@ -730,6 +794,16 @@ def run_wind_up_analysis(

test_df, pre_df, post_df = pre_post_splitter.split(test_df, test_wtg_name=test_name)

test_ops_curve_shift_dict = check_for_ops_curve_shift(
pre_df,
post_df,
wtg_name=test_name,
ws_col=test_ws_col,
pw_col=test_pw_col,
rpm_col=f"test_{DataColumns.gen_rpm_mean}",
pt_col=f"test_{DataColumns.pitch_angle_mean}",
)

# compare ops curves of pre_df and post_df
compare_ops_curves_pre_post(
pre_df=pre_df,
Expand All @@ -747,6 +821,9 @@ def run_wind_up_analysis(
"lt_wtg_hours_filt": lt_wtg_df_filt["observed_hours"].sum() if lt_wtg_df_filt is not None else 0,
"test_max_ws_drift": test_max_ws_drift,
"test_max_ws_drift_pp_period": test_max_ws_drift_pp_period,
"test_powercurve_shift": test_ops_curve_shift_dict["powercurve_shift"],
"test_rpm_shift": test_ops_curve_shift_dict["rpm_shift"],
"test_pitch_shift": test_ops_curve_shift_dict["pitch_shift"],
"preprocess_warning_counts": preprocess_warning_counts,
"test_warning_counts": len(result_manager.stored_warnings),
}
Expand Down
20 changes: 11 additions & 9 deletions wind_up/plots/combine_results_plots.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import norm

from wind_up.models import PlotConfig


def plot_combine_results(trdf: pd.DataFrame, tdf: pd.DataFrame, plot_cfg: PlotConfig) -> None:
def plot_combine_results(trdf: pd.DataFrame, tdf: pd.DataFrame, plot_cfg: PlotConfig, confidence: float = 0.9) -> None:
show_plots = plot_cfg.show_plots
save_plots = plot_cfg.save_plots
z_score = norm.ppf((1 + confidence) / 2)

plt.figure(figsize=(10, 6))
labels = [
Expand All @@ -18,14 +20,14 @@ def plot_combine_results(trdf: pd.DataFrame, tdf: pd.DataFrame, plot_cfg: PlotCo
)
]
values = trdf["uplift_frc"] * 100
yerrs = trdf["unc_one_sigma_frc"] * 100
yerrs = trdf["unc_one_sigma_frc"] * 100 * z_score
plt.bar(labels, values, yerr=yerrs, capsize=3)
plt.xlabel("turbine")
plt.ylabel("uplift [%] and 1-sigma uncertainty")
plot_title = "uplift by test-ref pair"
plt.ylabel("uplift [%]")
plot_title = f"uplift and {confidence*100:.0f}% CI by test-ref pair"
plt.title("uplift by test-ref pair")
plt.grid(axis="y")
plt.xticks(rotation=45, ha="right")
plt.xticks(rotation=90, ha="right")
plt.tight_layout()
if show_plots:
plt.show()
Expand All @@ -35,14 +37,14 @@ def plot_combine_results(trdf: pd.DataFrame, tdf: pd.DataFrame, plot_cfg: PlotCo
plt.figure()
labels = tdf["test_wtg"]
values = tdf["p50_uplift"] * 100
yerrs = tdf["sigma"] * 100
yerrs = tdf["sigma"] * 100 * z_score
plt.bar(labels, values, yerr=yerrs, capsize=3)
plt.xlabel("turbine")
plt.ylabel("uplift [%] and 1-sigma uncertainty")
plot_title = "combined uplift"
plt.ylabel("uplift [%]")
plot_title = f"combined uplift and {confidence*100:.0f}% CI"
plt.title(plot_title)
plt.grid(axis="y")
plt.xticks(rotation=45, ha="right")
plt.xticks(rotation=90, ha="right")
plt.tight_layout()
if show_plots:
plt.show()
Expand Down
Loading
Loading