diff --git a/CHANGELOG.md b/CHANGELOG.md index c10559a0..a4dd5a58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,22 @@ # Changelog All notable changes to this project will be documented in this file. If you make a notable change to the project, please add a line describing the change to the "unreleased" section. The maintainers will make an effort to keep the [Github Releases](https://github.com/NREL/OpenOA/releases) page up to date with this changelog. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## Unreleased + +- Features and updates: + - `MonteCarloAEP` updates + - Add an `n_jobs` input to the Monte Carlo AEP method to allow for the underlying models to be + parallelized during each iteration for faster ML model computation. + - Add an `apply_iav` input to the Monte Carlo AEP analysis method to toggle the addition of the + IAV factor at the end of the analysis. + - Add a `progress_bar` flag to `MonteCarloAEP.run()` to allow for turing on or off the + simulation's default progress bar. + - Implement missing `compute_wind_speed` in `openoa/utils/met_data_processing.py` and apply it to + the `PlantData` reanalysis validation steps in place of the manual calculation. +- Fixes: + - Add a default value for `PlantData`'s `asset_distance_matrix` and `asset_direction_matrix` to + ensure projects not utilizing location data are compatible. + ## v3.1.3 - 2025-01-31 - Pin SciPy to >= 1.7 and <1.14 to avoid an incompatibility error with PyGAM. diff --git a/openoa/analysis/aep.py b/openoa/analysis/aep.py index 28cc8f2d..e0e3236a 100644 --- a/openoa/analysis/aep.py +++ b/openoa/analysis/aep.py @@ -1,5 +1,6 @@ from __future__ import annotations +import sys import random import datetime from copy import deepcopy @@ -10,8 +11,8 @@ import numpy.typing as npt import statsmodels.api as sm import matplotlib.pyplot as plt -from tqdm import tqdm from attrs import field, define +from tqdm.auto import tqdm, trange from sklearn.metrics import r2_score, mean_squared_error from matplotlib.markers import MarkerStyle from sklearn.linear_model import LinearRegression @@ -115,6 +116,14 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin): points. Defaults to "lin". ml_setup_kwargs(:obj:`kwargs`): Keyword arguments to :py:class:`openoa.utils.machine_learning_setup.MachineLearningSetup` class. Defaults to {}. + n_jobs(:obj:`int` | :obj:`None`): The number of jobs to use for the computation in the scikit-learn model. + This will only provide speedup in case of sufficiently large problems.``None`` means 1 + unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. + apply_iav(:obj:`bool`): Toggles the application of the interannual variability at the end + of the simulation. If ``True``, then it is applied, and not if ``False``. Inclusion of + the IAV adjustment is useful for comparing against short-term estimates of energy + production, whereas the exclusion of the IAV is useful for comparing against long-term + energy production estimates. Defaults to ``True``. """ plant: PlantData = field(converter=deepcopy, validator=attrs.validators.instance_of(PlantData)) @@ -169,6 +178,10 @@ class MonteCarloAEP(FromDictMixin, ResetValuesMixin): default="lin", converter=str, validator=attrs.validators.in_(("lin", "gbm", "etr", "gam")) ) ml_setup_kwargs: dict = field(default={}, converter=dict) + n_jobs: int | None = field( + default=None, validator=attrs.validators.instance_of((int, type(None))) + ) + apply_iav: bool = field(default=True, validator=attrs.validators.instance_of(bool)) # Internally created attributes need to be given a type before usage resample_freq: str = field(init=False) @@ -285,6 +298,7 @@ def run( time_resolution: str = None, end_date_lt: str | pd.Timestamp | None = None, ml_setup_kwargs: dict = None, + progress_bar: bool = True, ) -> None: """ Process all appropriate data and run the MonteCarlo AEP analysis. @@ -324,6 +338,8 @@ def run( points. Defaults to "lin". ml_setup_kwargs(:obj:`kwargs`): Keyword arguments to :py:class:`openoa.utils.machine_learning_setup.MachineLearningSetup` class. Defaults to {}. + progress_bar(:obj:`bool`): Flag to use a progress bar for the iterations in the AEP + calculation. Defaults to ``True``. Returns: None @@ -382,7 +398,7 @@ def run( # Start the computation self.calculate_long_term_losses() self.setup_monte_carlo_inputs() - self.results = self.run_AEP_monte_carlo() + self.results = self.run_AEP_monte_carlo(progress_bar=progress_bar) # Log the completion of the run logger.info("Run completed") @@ -749,6 +765,10 @@ def filter_outliers(self, n): & (~df["nan_flag"]), :, ] + if df_sub.size == 0: + raise ValueError( + "The `uncertainty_loss_max` is too low for the data or there are too many NaN values." + ) # Set maximum range for using bin-filter, convert from MW to GWh plant_capac = self.plant.metadata.capacity / 1000.0 * self.resample_hours @@ -917,7 +937,9 @@ def run_regression(self, n): # Run regression. Note, the last column of reg_data is the target variable for the regression # Linear regression if self.reg_model == "lin": - reg = LinearRegression().fit(np.array(reg_data[:, 0:-1]), reg_data[:, -1]) + reg = LinearRegression(n_jobs=self.n_jobs).fit( + np.array(reg_data[:, 0:-1]), reg_data[:, -1] + ) predicted_y = reg.predict(np.array(reg_data[:, 0:-1])) self._mc_slope[n, :] = reg.coef_ @@ -946,6 +968,7 @@ def run_regression(self, n): report=False, cv=KFold(n_splits=5), verbose=verbosity, + n_jobs=self.n_jobs, ) # Store optimized hyperparameters for each reanalysis product self.opt_model[(self._run.reanalysis_product)] = ml.opt_model @@ -959,10 +982,14 @@ def run_regression(self, n): return self.opt_model[(self._run.reanalysis_product)] @logged_method_call - def run_AEP_monte_carlo(self): + def run_AEP_monte_carlo(self, progress_bar: bool = True): """ Loop through OA process a number of times and return array of AEP results each time + Args: + progress_bar(:obj:`bool`): Flag to use a progress bar for the iterations in the AEP + calculation. Defaults to ``True``. + Returns: :obj:`numpy.ndarray` Array of AEP, long-term avail, long-term curtailment calculations """ @@ -991,7 +1018,8 @@ def run_AEP_monte_carlo(self): iav = np.empty(num_sim) # Loop through number of simulations, run regression each time, store AEP results - for n in tqdm(np.arange(num_sim)): + _range = trange(num_sim) if progress_bar else np.arange(num_sim) + for n in _range: self._run = self.mc_inputs.loc[n] # Run regression @@ -1075,9 +1103,10 @@ def run_AEP_monte_carlo(self): iav_avg = iav.mean() # Apply IAV to AEP from single MC iterations - iav_nsim = np.random.normal(1, iav_avg, self.num_sim) - aep_GWh = aep_GWh * iav_nsim - lt_por_ratio = lt_por_ratio * iav_nsim + if self.apply_iav: + iav_nsim = np.random.normal(1, iav_avg, self.num_sim) + aep_GWh = aep_GWh * iav_nsim + lt_por_ratio = lt_por_ratio * iav_nsim # Return final output sim_results = pd.DataFrame( @@ -1534,6 +1563,8 @@ def plot_aep_boxplot( __defaults_ml_setup_kwargs = MonteCarloAEP.__attrs_attrs__.ml_setup_kwargs.default __defaults_reg_temperature = MonteCarloAEP.__attrs_attrs__.reg_temperature.default __defaults_reg_wind_direction = MonteCarloAEP.__attrs_attrs__.reg_wind_direction.default +__defaults_n_jobs = MonteCarloAEP.__attrs_attrs__.n_jobs.default +__defaults_apply_iav = MonteCarloAEP.__attrs_attrs__.apply_iav.default def create_MonteCarloAEP( @@ -1552,6 +1583,8 @@ def create_MonteCarloAEP( ml_setup_kwargs: dict = __defaults_ml_setup_kwargs, reg_temperature: bool = __defaults_reg_temperature, reg_wind_direction: bool = __defaults_reg_wind_direction, + n_jobs: int | None = __defaults_n_jobs, + apply_iav: bool = __defaults_apply_iav, ) -> MonteCarloAEP: return MonteCarloAEP( plant=project, @@ -1569,6 +1602,8 @@ def create_MonteCarloAEP( ml_setup_kwargs=ml_setup_kwargs, reg_temperature=reg_temperature, reg_wind_direction=reg_wind_direction, + n_jobs=n_jobs, + apply_iav=apply_iav, ) diff --git a/openoa/plant.py b/openoa/plant.py index 2cdc4e11..d76d3cc1 100644 --- a/openoa/plant.py +++ b/openoa/plant.py @@ -443,8 +443,8 @@ class PlantData: default={"missing": {}, "dtype": {}, "frequency": {}, "attributes": []}, init=False ) eia: dict = field(default={}, init=False) - asset_distance_matrix: pd.DataFrame = field(init=False) - asset_direction_matrix: pd.DataFrame = field(init=False) + asset_distance_matrix: pd.DataFrame = field(init=False, default=pd.DataFrame([])) + asset_direction_matrix: pd.DataFrame = field(init=False, default=pd.DataFrame([])) def __attrs_post_init__(self): """Post-initialization hook.""" @@ -1043,7 +1043,7 @@ def _calculate_reanalysis_columns(self) -> None: ws = col_map["WMETR_HorWdSpd"] if ws not in df and has_u_v: - df[ws] = np.sqrt(df[u].values ** 2 + df[v].values ** 2) + df[ws] = met.compute_wind_speed(df[u], df[v]).values wd = col_map["WMETR_HorWdDir"] if wd not in df and has_u_v: diff --git a/openoa/utils/machine_learning_setup.py b/openoa/utils/machine_learning_setup.py index 96e8e255..7175308d 100644 --- a/openoa/utils/machine_learning_setup.py +++ b/openoa/utils/machine_learning_setup.py @@ -167,6 +167,7 @@ def hyper_optimize( n_iter_search: int = 20, report: bool = True, verbose: int = 0, + n_jobs: int | None = None, ) -> None: """ Optimize hyperparameters through cross-validation @@ -186,6 +187,9 @@ def hyper_optimize( - >1 : the computation time for each fold and parameter candidate is displayed; - >2 : the score is also displayed; - >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation. + n_jobs(:obj:`int`): The number of jobs to use for the computation in the scikit-learn model. + This will only provide speedup in case of sufficiently large problems.``None`` means 1 + unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. Returns: (none) @@ -199,6 +203,7 @@ def hyper_optimize( scoring=self.my_scorer, verbose=0, return_train_score=True, + n_jobs=n_jobs, ) # Fit the model to each combination of hyperparmeters self.random_search.fit(X, y) diff --git a/openoa/utils/met_data_processing.py b/openoa/utils/met_data_processing.py index 767dffbc..8f5ee5f8 100644 --- a/openoa/utils/met_data_processing.py +++ b/openoa/utils/met_data_processing.py @@ -70,6 +70,34 @@ def circular_mean(x: pd.DataFrame | pd.Series | np.ndarray, axis: int = 0): ) +@series_method(data_cols=["u", "v"]) +def compute_wind_speed( + u: pd.Series | np.ndarray | str, v: pd.Series | np.ndarray | str, data: pd.DataFrame = None +) -> pd.Series | np.ndarray: + """Compute the wind speed from the u and v components. + + Note: + Vector-averaged u and v wind speed components will generally result in lower wind speed + magnitudes than averages of the wind speed magnitude over the same time period. This becomes + more severe as the averaging period increases. Therefore, the wind speed magnitude computed + using this function may be lower than expected when using data with low temporal resolution. + + Args: + u(:obj:`pandas.Series` | :obj:`numpy.ndarray` | :obj:`str`): A pandas DataFrame or Series, + a numpy array, or the :obj:`str` name of the column in :py:attr:`data` containing the + u-component of the wind speed, in m/s. + v(:obj:`pandas.Series` | :obj:`numpy.ndarray` | :obj:`str`): A pandas DataFrame or Series, + a numpy array, or the :obj:`str` name of the column in :py:attr:`data` containing the + v-component of the wind speed, in m/s. + data(:obj:`pandas.Series` | :obj:`str`): The pandas ``DataFrame`` containg the columns + :py:attr:`u` and :py:attr:`v`. + + Returns: + :obj:`pandas.Series` | :obj:`numpy.ndarray`: wind speed, in m/s. + """ + return np.sqrt(u**2 + v**2) + + @series_method(data_cols=["u", "v"]) def compute_wind_direction( u: pd.Series | str, v: pd.Series | str, data: pd.DataFrame = None