Skip to content

Commit

Permalink
Merge pull request #560 from monocongo/issue_535_dependency_updates
Browse files Browse the repository at this point in the history
Dependency version cleanups etc. for release of version 2.0.1
  • Loading branch information
monocongo authored Sep 19, 2024
2 parents 1478909 + b68457c commit 51174e3
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 58 deletions.
17 changes: 9 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ exclude = '''

[tool.poetry]
name = 'climate_indices'
version = '2.0.0'
version = '2.0.1'
description = 'Reference implementations of various climate indices typically used for drought monitoring'
authors = ['James Adams <monocongo@gmail.com>']
readme = 'README.md'
Expand All @@ -44,19 +44,21 @@ classifiers = [
packages = [{include = 'climate_indices', from = 'src'}]

[tool.poetry.dependencies]
cftime = '>=1.6.2'
dask = '>=2022.2.0'
h5netcdf = '>=1.1.0'
# only Python and scipy are required for the base library code
python = '>=3.8,<3.12'
scipy = '>=1.10.0'
scipy = '1.10.1'
# remaining dependencies are required for the CLI (console) scripts
cftime = '>=1.6.4'
dask = '>=2023.5.0'
h5netcdf = '>=1.1.0'
xarray = '>=2023.1.0'

[tool.poetry.dev-dependencies]
pytest = '*'
pytest = '8.3.3'
toml = '>=0.10.2'

[tool.poetry.group.dev.dependencies]
sphinx-autodoc-typehints = "^1.23.3"
sphinx-autodoc-typehints = '2.0.1'

[tool.poetry.scripts]
process_climate_indices = 'climate_indices.__main__:main'
Expand All @@ -66,4 +68,3 @@ spi = 'climate_indices.__spi__:main'
filterwarnings = [
'ignore::FutureWarning',
]

107 changes: 96 additions & 11 deletions src/climate_indices/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import multiprocessing
import os
from typing import Any, Dict, List
from typing import Any, Dict, List, Tuple

import numpy as np
import scipy.constants
Expand Down Expand Up @@ -563,7 +563,7 @@ def _drop_data_into_shared_arrays_grid(
var_names: list,
periodicity: compute.Periodicity,
data_start_year: int,
):
) -> Tuple[int, ...]:
output_shape = None

# get the data arrays we'll use later in the index computations
Expand Down Expand Up @@ -626,9 +626,16 @@ def _drop_data_into_shared_arrays_grid(


def _drop_data_into_shared_arrays_divisions(
dataset,
dataset: xr.Dataset,
var_names: List[str],
):
) -> Tuple[int, ...]:
"""
Drop data into shared arrays for use in the index computations.
:param dataset:
:param var_names:
:return:
"""
output_shape = None

# get the data arrays we'll use later in the index computations
Expand Down Expand Up @@ -874,7 +881,7 @@ def _compute_write_index(keyword_arguments):
# TODO once we support daily Palmers then we'll need to convert values
# from a 366-day calendar back into a normal/Gregorian calendar

# get the computedPDSI data as an array of float32 values
# get the computed PDSI data as an array of float32 values
array = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_ARRAY]
shape = _global_shared_arrays[_KEY_RESULT_PDSI][_KEY_SHAPE]
pdsi = np.frombuffer(array.get_obj()).reshape(shape).astype(float)
Expand Down Expand Up @@ -1579,6 +1586,42 @@ def main(): # type: () -> None

arguments = parser.parse_args()

process_climate_indices(arguments=arguments)

# report the elapsed time
end_datetime = datetime.now()
_logger.info("End time: %s", end_datetime)
elapsed = end_datetime - start_datetime
_logger.info("Elapsed time: %s", elapsed)

except Exception:
_logger.exception("Failed to complete", exc_info=True)
raise


def process_climate_indices(
arguments: argparse.Namespace,
) -> None:
"""
Process climate indices based on the provided arguments.
:param arguments: A dictionary or argparse.Namespace containing the arguments
:return: The results of the climate indices processing
"""
# Extract arguments
# index = args['index']
# periodicity = args['periodicity']
# scales = args['scales']
# calibration_start_year = args['calibration_start_year']
# calibration_end_year = args['calibration_end_year']
# netcdf_precip = args['netcdf_precip']
# var_name_precip = args['var_name_precip']
# output_file_base = args['output_file_base']

# Add your existing processing logic here
# ...

try:
# validate the arguments and determine the input type
input_type = _validate_args(arguments)

Expand Down Expand Up @@ -1740,16 +1783,12 @@ def main(): # type: () -> None
if netcdf_awc != arguments.netcdf_awc:
os.remove(netcdf_awc)

# report on the elapsed time
end_datetime = datetime.now()
_logger.info("End time: %s", end_datetime)
elapsed = end_datetime - start_datetime
_logger.info("Elapsed time: %s", elapsed)

except Exception:
_logger.exception("Failed to complete", exc_info=True)
raise

return None


if __name__ == "__main__":
# (please do not remove -- useful for running as a script when debugging)
Expand Down Expand Up @@ -1777,4 +1816,50 @@ def main(): # type: () -> None
# --calibration_start_year 1951 --calibration_end_year 2010
# --multiprocessing all --periodicity monthly

"""
SYNOPSIS:
The main program in the provided code excerpt is designed to process climate indices on NetCDF
gridded datasets in parallel, leveraging Python's multiprocessing module. The process can be
broken down into several key steps, which together implement a quasi "map-reduce" model for parallel
computation. Here's an overview of how it works:
Step 1: Initialization and Argument Parsing
The program starts by parsing command-line arguments that specify the details of the computation,
such as the index to compute (e.g., SPI, SPEI), the input NetCDF files, and various parameters
relevant to the computation. It then validates these arguments to ensure they form a coherent set
of instructions for the computation.
Step 2: Setting Up Multiprocessing
Based on the command-line arguments, the program determines the number of worker processes to use.
It can use all available CPUs minus one, a single process, or all CPUs, depending on the user's choice.
Global shared arrays are prepared for use by worker processes. These arrays hold the input data
(e.g., precipitation, temperature) and the results of the computations.
Step 3: Data Preparation
The input data from NetCDF files is loaded into shared memory arrays. This step involves reading the data,
possibly converting units, and then distributing it across shared arrays that worker processes can access.
The program checks the dimensions and shapes of the input data to ensure they match expected patterns,
adjusting as necessary to fit the computation requirements.
Step 4: Parallel Computation ("Map")
The program splits the computation into chunks that can be processed independently.
This is the "map" part of the "map-reduce" model.
Worker processes are spawned, each taking a portion of the data from the shared arrays
to compute the climate index (e.g., SPI, SPEI) over that subset.
Each worker applies the computation function along the specified axis of the data chunk it has been given.
This could involve complex calculations like the Thornthwaite method for PET or statistical analysis for SPI.
Step 5: Aggregating Results ("Reduce")
Once all worker processes complete their computations, the results are aggregated back into a single dataset. Summary
This is the "reduce" part of the "map-reduce" model.
The program collects the computed indices from the shared arrays and assembles them into a coherent
output dataset, maintaining the correct dimensions and metadata.
Step 6: Writing Output
The final step involves writing the computed indices back to NetCDF files.
Each index computed (e.g., SPI, SPEI, PET) is saved in its own file.
The program ensures that the output files contain all necessary metadata and are structured
correctly to be used in further analysis or visualization.
"""
main()
14 changes: 7 additions & 7 deletions src/climate_indices/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,23 +118,23 @@ def sum_to_scale(
) -> np.ndarray:
"""
Compute a sliding sums array using 1-D convolution. The initial
(scale - 1) elements of the result array will be padded with np.NaN values.
Missing values are not ignored, i.e. if a np.NaN
(scale - 1) elements of the result array will be padded with np.nan values.
Missing values are not ignored, i.e. if a np.nan
(missing) value is part of the group of values to be summed then the sum
will be np.NaN
will be np.nan
For example if the first array is [3, 4, 6, 2, 1, 3, 5, 8, 5] and
the number of values to sum is 3 then the resulting array
will be [np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18].
will be [np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18].
More generally:
Y = f(X, n)
Y[i] == np.NaN, where i < n
Y[i] == np.nan, where i < n
Y[i] == sum(X[i - n + 1:i + 1]), where i >= n - 1 and X[i - n + 1:i + 1]
contains no NaN values
Y[i] == np.NaN, where i >= n - 1 and X[i - n + 1:i + 1] contains
Y[i] == np.nan, where i >= n - 1 and X[i - n + 1:i + 1] contains
one or more NaN values
:param values: the array of values over which we'll compute sliding sums
Expand All @@ -158,7 +158,7 @@ def sum_to_scale(

# BELOW FOR dask/xarray DataArray integration
# # pad the values array with (scale - 1) NaNs
# values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.NaN)
# values = pad(values, pad_width=(scale - 1, 0), mode='constant', constant_values=np.nan)
#
# start = 1
# end = -(scale - 2)
Expand Down
2 changes: 1 addition & 1 deletion src/climate_indices/indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def percentage_of_normal(

# get an array containing a sliding sum on the specified time step
# scale -- i.e. if the scale is 3 then the first two elements will be
# np.NaN, since we need 3 elements to get a sum, and then from the third
# np.nan, since we need 3 elements to get a sum, and then from the third
# element to the end the values will equal the sum of the corresponding
# time step plus the values of the two previous time steps
scale_sums = compute.sum_to_scale(values, scale)
Expand Down
30 changes: 15 additions & 15 deletions tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_transform_fitted_gamma(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
computed_values = compute.transform_fitted_gamma(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -172,9 +172,9 @@ def test_gamma_parameters(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
nan_alphas = np.full(shape=(12,), fill_value=np.NaN)
nan_betas = np.full(shape=(12,), fill_value=np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
nan_alphas = np.full(shape=(12,), fill_value=np.nan)
nan_betas = np.full(shape=(12,), fill_value=np.nan)
alphas, betas = compute.gamma_parameters(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -247,7 +247,7 @@ def test_transform_fitted_pearson(
"""

# confirm that an input array of all NaNs results in the same array returned
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
computed_values = compute.transform_fitted_pearson(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -280,7 +280,7 @@ def test_transform_fitted_pearson(
)

# confirm that an input array of all NaNs will return the same array
all_nans = np.full(precips_mm_monthly.shape, np.NaN)
all_nans = np.full(precips_mm_monthly.shape, np.nan)
computed_values = compute.transform_fitted_pearson(
all_nans,
data_year_start_monthly,
Expand Down Expand Up @@ -524,44 +524,44 @@ def test_sum_to_scale():
# test an input array with no missing values
values = np.array([3.0, 4, 6, 2, 1, 3, 5, 8, 5])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
)
computed_values = compute.sum_to_scale(values, 4)
expected_values = np.array([np.NaN, np.NaN, np.NaN, 15, 13, 12, 11, 17, 21])
expected_values = np.array([np.nan, np.nan, np.nan, 15, 13, 12, 11, 17, 21])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg=UNEXPECTED_SLIDING_SUMS_MESSAGE,
)

# test an input array with missing values on the end
values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.NaN, np.NaN, np.NaN])
values = np.array([3, 4, 6, 2, 1, 3, 5, 8, 5, np.nan, np.nan, np.nan])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, 16, 18, np.NaN, np.NaN, np.NaN])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, 16, 18, np.nan, np.nan, np.nan])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
)

# test an input array with missing values within the array
values = np.array([3, 4, 6, 2, 1, 3, 5, np.NaN, 8, 5, 6])
values = np.array([3, 4, 6, 2, 1, 3, 5, np.nan, 8, 5, 6])
computed_values = compute.sum_to_scale(values, 3)
expected_values = np.array([np.NaN, np.NaN, 13, 12, 9, 6, 9, np.NaN, np.NaN, np.NaN, 19])
expected_values = np.array([np.nan, np.nan, 13, 12, 9, 6, 9, np.nan, np.nan, np.nan, 19])
np.testing.assert_allclose(
computed_values,
expected_values,
err_msg="Sliding sums not computed as expected when missing values appended to end of input array",
)

test_values = np.array([1.0, 5, 7, 2, 3, 4, 9, 6, 3, 8])
sum_by2 = np.array([np.NaN, 6, 12, 9, 5, 7, 13, 15, 9, 11])
sum_by4 = np.array([np.NaN, np.NaN, np.NaN, 15, 17, 16, 18, 22, 22, 26])
sum_by6 = np.array([np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22, 30, 31, 27, 33])
sum_by2 = np.array([np.nan, 6, 12, 9, 5, 7, 13, 15, 9, 11])
sum_by4 = np.array([np.nan, np.nan, np.nan, 15, 17, 16, 18, 22, 22, 26])
sum_by6 = np.array([np.nan, np.nan, np.nan, np.nan, np.nan, 22, 30, 31, 27, 33])
np.testing.assert_equal(
compute.sum_to_scale(test_values, 2),
sum_by2,
Expand Down
10 changes: 5 additions & 5 deletions tests/test_eto.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,19 @@ def test_eto_thornthwaite(temps_celsius, latitude_degrees, data_year_start_month
pytest.raises(TypeError, eto.eto_thornthwaite, temps_celsius, None, data_year_start_monthly)

# make sure that an invalid latitude value (NaN) raises an error
pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.NaN, data_year_start_monthly)
pytest.raises(ValueError, eto.eto_thornthwaite, temps_celsius, np.nan, data_year_start_monthly)


# ------------------------------------------------------------------------------
def test_sunset_hour_angle():
# make sure that an invalid latitude value raises an error
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(-100.0), np.deg2rad(0.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.NaN, np.deg2rad(0.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.nan, np.deg2rad(0.0))

# make sure that an invalid solar declination angle raises an error
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(-75.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.deg2rad(85.0))
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.NaN)
pytest.raises(ValueError, eto._sunset_hour_angle, np.deg2rad(0.0), np.nan)

expected_value = math.pi / 2
computed_value = eto._sunset_hour_angle(0.0, np.deg2rad(0.0))
Expand Down Expand Up @@ -113,7 +113,7 @@ def test_solar_declination():
pytest.raises(ValueError, eto._solar_declination, -1)
pytest.raises(ValueError, eto._solar_declination, 367)
pytest.raises(ValueError, eto._solar_declination, 5000)
pytest.raises(ValueError, eto._solar_declination, np.NaN)
pytest.raises(ValueError, eto._solar_declination, np.nan)

expected_value = -0.313551072399921
computed_value = eto._solar_declination(30)
Expand All @@ -130,7 +130,7 @@ def test_daylight_hours():
# make sure invalid arguments raise an error
pytest.raises(ValueError, eto._daylight_hours, math.pi + 1)
pytest.raises(ValueError, eto._daylight_hours, -1.0)
pytest.raises(ValueError, eto._daylight_hours, np.NaN)
pytest.raises(ValueError, eto._daylight_hours, np.nan)

expected_value = 7.999999999999999
computed_value = eto._daylight_hours(math.pi / 3)
Expand Down
Loading

0 comments on commit 51174e3

Please sign in to comment.