Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
# Changelog
All notable changes to this project will be documented in this file. If you make a notable change to the project, please add a line describing the change to the "unreleased" section. The maintainers will make an effort to keep the [Github Releases](https://github.com/NREL/OpenOA/releases) page up to date with this changelog. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

- Features and updates:
- `MonteCarloAEP` updates
- Add an `n_jobs` input to the Monte Carlo AEP method to allow for the underlying models to be
parallelized during each iteration for faster ML model computation.
- Add an `apply_iav` input to the Monte Carlo AEP analysis method to toggle the addition of the
IAV factor at the end of the analysis.
- Add a `progress_bar` flag to `MonteCarloAEP.run()` to allow for turing on or off the
simulation's default progress bar.
- Implement missing `compute_wind_speed` in `openoa/utils/met_data_processing.py` and apply it to
the `PlantData` reanalysis validation steps in place of the manual calculation.
- Fixes:
- Add a default value for `PlantData`'s `asset_distance_matrix` and `asset_direction_matrix` to
ensure projects not utilizing location data are compatible.

## v3.1.3 - 2025-01-31

- Pin SciPy to >= 1.7 and <1.14 to avoid an incompatibility error with PyGAM.
Expand Down
51 changes: 43 additions & 8 deletions openoa/analysis/aep.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import sys
import random
import datetime
from copy import deepcopy
Expand All @@ -10,8 +11,8 @@
import numpy.typing as npt
import statsmodels.api as sm
import matplotlib.pyplot as plt
from tqdm import tqdm
from attrs import field, define
from tqdm.auto import tqdm, trange
from sklearn.metrics import r2_score, mean_squared_error
from matplotlib.markers import MarkerStyle
from sklearn.linear_model import LinearRegression
Expand Down Expand Up @@ -115,6 +116,14 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
points. Defaults to "lin".
ml_setup_kwargs(:obj:`kwargs`): Keyword arguments to
:py:class:`openoa.utils.machine_learning_setup.MachineLearningSetup` class. Defaults to {}.
n_jobs(:obj:`int` | :obj:`None`): The number of jobs to use for the computation in the scikit-learn model.
This will only provide speedup in case of sufficiently large problems.``None`` means 1
unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors.
apply_iav(:obj:`bool`): Toggles the application of the interannual variability at the end
of the simulation. If ``True``, then it is applied, and not if ``False``. Inclusion of
the IAV adjustment is useful for comparing against short-term estimates of energy
production, whereas the exclusion of the IAV is useful for comparing against long-term
energy production estimates. Defaults to ``True``.
"""

plant: PlantData = field(converter=deepcopy, validator=attrs.validators.instance_of(PlantData))
Expand Down Expand Up @@ -169,6 +178,10 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin):
default="lin", converter=str, validator=attrs.validators.in_(("lin", "gbm", "etr", "gam"))
)
ml_setup_kwargs: dict = field(default={}, converter=dict)
n_jobs: int | None = field(
default=None, validator=attrs.validators.instance_of((int, type(None)))
)
apply_iav: bool = field(default=True, validator=attrs.validators.instance_of(bool))

# Internally created attributes need to be given a type before usage
resample_freq: str = field(init=False)
Expand Down Expand Up @@ -285,6 +298,7 @@ def run(
time_resolution: str = None,
end_date_lt: str | pd.Timestamp | None = None,
ml_setup_kwargs: dict = None,
progress_bar: bool = True,
) -> None:
"""
Process all appropriate data and run the MonteCarlo AEP analysis.
Expand Down Expand Up @@ -324,6 +338,8 @@ def run(
points. Defaults to "lin".
ml_setup_kwargs(:obj:`kwargs`): Keyword arguments to
:py:class:`openoa.utils.machine_learning_setup.MachineLearningSetup` class. Defaults to {}.
progress_bar(:obj:`bool`): Flag to use a progress bar for the iterations in the AEP
calculation. Defaults to ``True``.

Returns:
None
Expand Down Expand Up @@ -382,7 +398,7 @@ def run(
# Start the computation
self.calculate_long_term_losses()
self.setup_monte_carlo_inputs()
self.results = self.run_AEP_monte_carlo()
self.results = self.run_AEP_monte_carlo(progress_bar=progress_bar)

# Log the completion of the run
logger.info("Run completed")
Expand Down Expand Up @@ -749,6 +765,10 @@ def filter_outliers(self, n):
& (~df["nan_flag"]),
:,
]
if df_sub.size == 0:
raise ValueError(
"The `uncertainty_loss_max` is too low for the data or there are too many NaN values."
)

# Set maximum range for using bin-filter, convert from MW to GWh
plant_capac = self.plant.metadata.capacity / 1000.0 * self.resample_hours
Expand Down Expand Up @@ -917,7 +937,9 @@ def run_regression(self, n):
# Run regression. Note, the last column of reg_data is the target variable for the regression
# Linear regression
if self.reg_model == "lin":
reg = LinearRegression().fit(np.array(reg_data[:, 0:-1]), reg_data[:, -1])
reg = LinearRegression(n_jobs=self.n_jobs).fit(
np.array(reg_data[:, 0:-1]), reg_data[:, -1]
)
predicted_y = reg.predict(np.array(reg_data[:, 0:-1]))

self._mc_slope[n, :] = reg.coef_
Expand Down Expand Up @@ -946,6 +968,7 @@ def run_regression(self, n):
report=False,
cv=KFold(n_splits=5),
verbose=verbosity,
n_jobs=self.n_jobs,
)
# Store optimized hyperparameters for each reanalysis product
self.opt_model[(self._run.reanalysis_product)] = ml.opt_model
Expand All @@ -959,10 +982,14 @@ def run_regression(self, n):
return self.opt_model[(self._run.reanalysis_product)]

@logged_method_call
def run_AEP_monte_carlo(self):
def run_AEP_monte_carlo(self, progress_bar: bool = True):
"""
Loop through OA process a number of times and return array of AEP results each time

Args:
progress_bar(:obj:`bool`): Flag to use a progress bar for the iterations in the AEP
calculation. Defaults to ``True``.

Returns:
:obj:`numpy.ndarray` Array of AEP, long-term avail, long-term curtailment calculations
"""
Expand Down Expand Up @@ -991,7 +1018,8 @@ def run_AEP_monte_carlo(self):
iav = np.empty(num_sim)

# Loop through number of simulations, run regression each time, store AEP results
for n in tqdm(np.arange(num_sim)):
_range = trange(num_sim) if progress_bar else np.arange(num_sim)
for n in _range:
self._run = self.mc_inputs.loc[n]

# Run regression
Expand Down Expand Up @@ -1075,9 +1103,10 @@ def run_AEP_monte_carlo(self):
iav_avg = iav.mean()

# Apply IAV to AEP from single MC iterations
iav_nsim = np.random.normal(1, iav_avg, self.num_sim)
aep_GWh = aep_GWh * iav_nsim
lt_por_ratio = lt_por_ratio * iav_nsim
if self.apply_iav:
iav_nsim = np.random.normal(1, iav_avg, self.num_sim)
aep_GWh = aep_GWh * iav_nsim
lt_por_ratio = lt_por_ratio * iav_nsim

# Return final output
sim_results = pd.DataFrame(
Expand Down Expand Up @@ -1534,6 +1563,8 @@ def plot_aep_boxplot(
__defaults_ml_setup_kwargs = MonteCarloAEP.__attrs_attrs__.ml_setup_kwargs.default
__defaults_reg_temperature = MonteCarloAEP.__attrs_attrs__.reg_temperature.default
__defaults_reg_wind_direction = MonteCarloAEP.__attrs_attrs__.reg_wind_direction.default
__defaults_n_jobs = MonteCarloAEP.__attrs_attrs__.n_jobs.default
__defaults_apply_iav = MonteCarloAEP.__attrs_attrs__.apply_iav.default


def create_MonteCarloAEP(
Expand All @@ -1552,6 +1583,8 @@ def create_MonteCarloAEP(
ml_setup_kwargs: dict = __defaults_ml_setup_kwargs,
reg_temperature: bool = __defaults_reg_temperature,
reg_wind_direction: bool = __defaults_reg_wind_direction,
n_jobs: int | None = __defaults_n_jobs,
apply_iav: bool = __defaults_apply_iav,
) -> MonteCarloAEP:
return MonteCarloAEP(
plant=project,
Expand All @@ -1569,6 +1602,8 @@ def create_MonteCarloAEP(
ml_setup_kwargs=ml_setup_kwargs,
reg_temperature=reg_temperature,
reg_wind_direction=reg_wind_direction,
n_jobs=n_jobs,
apply_iav=apply_iav,
)


Expand Down
6 changes: 3 additions & 3 deletions openoa/plant.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,8 @@ class PlantData:
default={"missing": {}, "dtype": {}, "frequency": {}, "attributes": []}, init=False
)
eia: dict = field(default={}, init=False)
asset_distance_matrix: pd.DataFrame = field(init=False)
asset_direction_matrix: pd.DataFrame = field(init=False)
asset_distance_matrix: pd.DataFrame = field(init=False, default=pd.DataFrame([]))
asset_direction_matrix: pd.DataFrame = field(init=False, default=pd.DataFrame([]))

def __attrs_post_init__(self):
"""Post-initialization hook."""
Expand Down Expand Up @@ -1043,7 +1043,7 @@ def _calculate_reanalysis_columns(self) -> None:

ws = col_map["WMETR_HorWdSpd"]
if ws not in df and has_u_v:
df[ws] = np.sqrt(df[u].values ** 2 + df[v].values ** 2)
df[ws] = met.compute_wind_speed(df[u], df[v]).values

wd = col_map["WMETR_HorWdDir"]
if wd not in df and has_u_v:
Expand Down
5 changes: 5 additions & 0 deletions openoa/utils/machine_learning_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def hyper_optimize(
n_iter_search: int = 20,
report: bool = True,
verbose: int = 0,
n_jobs: int | None = None,
) -> None:
"""
Optimize hyperparameters through cross-validation
Expand All @@ -186,6 +187,9 @@ def hyper_optimize(
- >1 : the computation time for each fold and parameter candidate is displayed;
- >2 : the score is also displayed;
- >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation.
n_jobs(:obj:`int`): The number of jobs to use for the computation in the scikit-learn model.
This will only provide speedup in case of sufficiently large problems.``None`` means 1
unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors.

Returns:
(none)
Expand All @@ -199,6 +203,7 @@ def hyper_optimize(
scoring=self.my_scorer,
verbose=0,
return_train_score=True,
n_jobs=n_jobs,
)
# Fit the model to each combination of hyperparmeters
self.random_search.fit(X, y)
Expand Down
28 changes: 28 additions & 0 deletions openoa/utils/met_data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,34 @@ def circular_mean(x: pd.DataFrame | pd.Series | np.ndarray, axis: int = 0):
)


@series_method(data_cols=["u", "v"])
def compute_wind_speed(
u: pd.Series | np.ndarray | str, v: pd.Series | np.ndarray | str, data: pd.DataFrame = None
) -> pd.Series | np.ndarray:
"""Compute the wind speed from the u and v components.

Note:
Vector-averaged u and v wind speed components will generally result in lower wind speed
magnitudes than averages of the wind speed magnitude over the same time period. This becomes
more severe as the averaging period increases. Therefore, the wind speed magnitude computed
using this function may be lower than expected when using data with low temporal resolution.

Args:
u(:obj:`pandas.Series` | :obj:`numpy.ndarray` | :obj:`str`): A pandas DataFrame or Series,
a numpy array, or the :obj:`str` name of the column in :py:attr:`data` containing the
u-component of the wind speed, in m/s.
v(:obj:`pandas.Series` | :obj:`numpy.ndarray` | :obj:`str`): A pandas DataFrame or Series,
a numpy array, or the :obj:`str` name of the column in :py:attr:`data` containing the
v-component of the wind speed, in m/s.
data(:obj:`pandas.Series` | :obj:`str`): The pandas ``DataFrame`` containg the columns
:py:attr:`u` and :py:attr:`v`.

Returns:
:obj:`pandas.Series` | :obj:`numpy.ndarray`: wind speed, in m/s.
"""
return np.sqrt(u**2 + v**2)


@series_method(data_cols=["u", "v"])
def compute_wind_direction(
u: pd.Series | str, v: pd.Series | str, data: pd.DataFrame = None
Expand Down