Skip to content

Commit

Permalink
added check if data/files already exist
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentinGebhart committed Sep 26, 2024
1 parent b266106 commit aae38e8
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 268 deletions.
136 changes: 78 additions & 58 deletions climada_petals/hazard/copernicus_forecast/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,20 +254,13 @@ def _download_multvar_multlead(
Returns:
None
"""
area_str = f'{int(area[1])}_{int(area[0])}_{int(area[2])}_{int(area[3])}'
download_file = f'{filename}'

# check if data already exists including all relevant data variables
data_already_exists = os.path.isfile(f'{download_file}')
if data_already_exists and format == 'grib':
existing_variables = list(xr.open_dataset(
download_file, engine='cfgrib', decode_cf=False, chunks={}
).data_vars)
vars_short = [indicator.VAR_SPECS[var]['short_name'] for var in vars]
data_already_exists = set(vars_short).issubset(existing_variables)

download_file = f'{filename}'
data_already_exists = self._is_data_present(f'{download_file}', 'grib', vars)

if data_already_exists and not overwrite:
self.logger.info(f'Corresponding {format} file {download_file} already exists.')

Check warning on line 262 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

logging-fstring-interpolation

NORMAL: Use lazy % formatting in logging functions
Raw output
no description found

else:
try:
c = cdsapi.Client(url=self.url, key=self.key)

Check warning on line 266 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

invalid-name

LOW: Variable name "c" doesn't conform to '(([a-z][a-z0-9_]{2,30})|(_[a-z0-9_]*))$' pattern
Raw output
Used when the name doesn't match the regular expression associated to its type(constant, variable, class...).
Expand Down Expand Up @@ -366,12 +359,10 @@ def _process_data(self, data_out, year_list, month_list, bounds, overwrite, tf_i
download_file = f"{data_out}/{format}/{year}/{month:02d}/{index_params['filename_lead']}_{area_str}_{year}{month:02d}.{file_extension}"

Check warning on line 359 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (151/100)
Raw output
Used when a line is longer than a given number of characters.

Check warning on line 360 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
# check if data already exists including all relevant data variables
data_already_exists = os.path.exists(daily_file)
if data_already_exists and format == 'grib':
vars = indicator.get_index_params(tf_index)['variables']
vars_short = [indicator.VAR_SPECS[var]['short_name'] for var in vars]
existing_variables = list(xr.open_dataset(daily_file, decode_cf=False, chunks={}).data_vars)
data_already_exists = set(vars_short).issubset(existing_variables)
data_already_exists = self._is_data_present(
download_file, file_extension,
indicator.get_index_params(tf_index)['variables']
)

Check warning on line 366 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
if not data_already_exists or overwrite:
try:
Expand Down Expand Up @@ -438,42 +429,51 @@ def calculate_index(
# path to input file of daily variables
input_file_name = f"{data_out}/netcdf/daily/{year}/{month:02d}" \
f'/{index_params["filename_lead"]}_{area_str}_{year}{month:02d}.nc'
grib_file_path = f"{data_out}/grib/{year}/{month:02d}" \
grib_file_name = f"{data_out}/grib/{year}/{month:02d}" \
f"/{index_params['filename_lead']}_{area_str}_{year}{month:02d}.grib"

if tf_index in ["HIS", "HIA", "Tmean", "Tmax", "Tmin"]:
# Calculate heat indices like HIS, HIA, Tmean, Tmax, Tmin
ds_daily, ds_monthly, ds_stats = indicator.calculate_heat_indices(input_file_name, tf_index)

elif tf_index == "TR":
# Handle Tropical Nights (TR)
ds_daily, ds_monthly, ds_stats = indicator.calculate_and_save_tropical_nights_per_lag(grib_file_path, tf_index)

elif tf_index == "TX30":
# Handle Hot Days (Tmax > 30°C)
ds_daily, ds_monthly, ds_stats = indicator.calculate_and_save_tx30_per_lag(grib_file_path, tf_index)

Check warning on line 434 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
# TODO: add functionality
# elif tf_index == "HW":
# Handle Heat Wave Days (3 consecutive days Tmax > threshold)
# calculate_and_save_heat_wave_days_per_lag(data_out, year_list, month_list, tf_index, area_selection)

else:
logging.error(f"Index {tf_index} is not implemented. Supported indices are 'HIS', 'HIA', 'Tmean', 'Tmax', 'Tmin', 'HotDays', 'TR', and 'HW'.")

# paths to output files
output_dir = f"{data_out}/{tf_index}/{year}/{month:02d}"
output_daily_path = f'{output_dir}/daily_{tf_index}_{area_str}_{year}{month:02d}.nc'
output_stats_path = f'{output_dir}/stats/stats_{tf_index}_{area_str}_{year}{month:02d}.nc'

Check warning on line 438 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (106/100)
Raw output
Used when a line is longer than a given number of characters.
output_monthly_path = f'{output_dir}/{tf_index}_{area_str}_{year}{month:02d}.nc'
os.makedirs(os.path.dirname(output_stats_path), exist_ok=True)

if tf_index in ["HIS", "HIA", "Tmean", "Tmax", "Tmin"]:
ds_daily.to_netcdf(output_daily_path)
ds_monthly.to_netcdf(output_monthly_path)
ds_stats.to_netcdf(output_stats_path)

def save_index_to_hazard(self, year_list, month_list, area_selection, data_out, tf_index):
# check if index (monthly) file exists
if os.path.exists(output_monthly_path) and not overwrite:
self.logger.info(

Check warning on line 444 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

logging-fstring-interpolation

NORMAL: Use lazy % formatting in logging functions
Raw output
no description found
f'Index file {tf_index}_{area_str}_{year}{month:02d}.nc already exists.'
)

else:
if tf_index in ["HIS", "HIA", "Tmean", "Tmax", "Tmin"]:
# Calculate heat indices like HIS, HIA, Tmean, Tmax, Tmin
ds_daily, ds_monthly, ds_stats = indicator.calculate_heat_indices(input_file_name, tf_index)

Check warning on line 451 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (116/100)
Raw output
Used when a line is longer than a given number of characters.

elif tf_index == "TR":
# Handle Tropical Nights (TR)
ds_daily, ds_monthly, ds_stats = indicator.calculate_and_save_tropical_nights_per_lag(grib_file_name, tf_index)

Check warning on line 455 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (135/100)
Raw output
Used when a line is longer than a given number of characters.

elif tf_index == "TX30":
# Handle Hot Days (Tmax > 30°C)
ds_daily, ds_monthly, ds_stats = indicator.calculate_and_save_tx30_per_lag(grib_file_name, tf_index)

Check warning on line 459 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (124/100)
Raw output
Used when a line is longer than a given number of characters.

Check warning on line 460 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
# TODO: add functionality

Check warning on line 461 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

fixme

NORMAL: TODO: add functionality
Raw output
no description found
# elif tf_index == "HW":
# Handle Heat Wave Days (3 consecutive days Tmax > threshold)
# calculate_and_save_heat_wave_days_per_lag(data_out, year_list, month_list, tf_index, area_selection)

Check warning on line 464 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (126/100)
Raw output
Used when a line is longer than a given number of characters.

else:
logging.error(f"Index {tf_index} is not implemented. Supported indices are 'HIS', 'HIA', 'Tmean', 'Tmax', 'Tmin', 'HotDays', 'TR', and 'HW'.")

Check warning on line 467 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (166/100)
Raw output
Used when a line is longer than a given number of characters.

Check warning on line 467 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

logging-fstring-interpolation

NORMAL: Use lazy % formatting in logging functions
Raw output
no description found

# save files
self.logger.info(f"Writing index data to {output_monthly_path}.")

Check warning on line 470 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

logging-fstring-interpolation

NORMAL: Use lazy % formatting in logging functions
Raw output
no description found
if tf_index in ["HIS", "HIA", "Tmean", "Tmax", "Tmin"]:
ds_daily.to_netcdf(output_daily_path)
ds_monthly.to_netcdf(output_monthly_path)
ds_stats.to_netcdf(output_stats_path)

def save_index_to_hazard(self, year_list, month_list, area_selection, data_out, overwrite, tf_index):

Check warning on line 476 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (105/100)
Raw output
Used when a line is longer than a given number of characters.

Check warning on line 476 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

too-many-locals

LOW: Too many local variables (31/15)
Raw output
Used when a function or method has too many local variables.
"""
Processes the calculated climate indices into hazard objects and saves them.
Expand All @@ -496,6 +496,7 @@ def save_index_to_hazard(self, year_list, month_list, area_selection, data_out,

for year in year_list:
for month in month_list:

month_str = f"{month:02d}"
current_input_dir = f'{base_input_dir}/{year}/{month:02d}'
nc_file_pattern = f"{hazard_type}_{area_str}_{year}{month_str}.nc"
Expand All @@ -511,21 +512,24 @@ def save_index_to_hazard(self, year_list, month_list, area_selection, data_out,
ensemble_members = ds["number"].values

for member in ensemble_members:
ds_subset = ds.sel(number=member)

hazard = Hazard.from_xarray_raster(
data=ds_subset,
hazard_type=hazard_type,
intensity_unit=intensity_unit,
intensity=intensity_variable,
coordinate_vars={"event": "step", "longitude": "longitude", "latitude": "latitude"}
)

hazard.check()

# check if data already exists
filename = f"hazard_{hazard_type}_member_{member}_{area_str}_{year}{month_str}.hdf5"

Check warning on line 516 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (108/100)
Raw output
Used when a line is longer than a given number of characters.
file_path = os.path.join(current_output_dir, filename)
hazard.write_hdf5(file_path)
if os.path.exists(file_path) and not overwrite:
self.logger.info(f'Index file ' \

Check warning on line 519 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

logging-fstring-interpolation

NORMAL: Use lazy % formatting in logging functions
Raw output
no description found
f'{tf_index}_{area_str}_{year}{month:02d}.nc already exists.')
else:
ds_subset = ds.sel(number=member)
hazard = Hazard.from_xarray_raster(
data=ds_subset,
hazard_type=hazard_type,
intensity_unit=intensity_unit,
intensity=intensity_variable,
coordinate_vars={"event": "step", "longitude": "longitude", "latitude": "latitude"}

Check warning on line 528 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (115/100)
Raw output
Used when a line is longer than a given number of characters.
)

hazard.check()
hazard.write_hdf5(file_path)

print(f"Completed processing for {year}-{month_str}. Data saved in {current_output_dir}")

Check warning on line 534 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (109/100)
Raw output
Used when a line is longer than a given number of characters.

Expand All @@ -537,4 +541,20 @@ def save_index_to_hazard(self, year_list, month_list, area_selection, data_out,
# print final hazard
last_hazard_file = file_path
hazard_obj = Hazard.from_hdf5(last_hazard_file)
hazard_obj.plot_intensity(1, smooth=False)
hazard_obj.plot_intensity(1, smooth=False)

Check warning on line 545 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
@staticmethod
def _is_data_present(file, format, vars):

Check warning on line 547 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

redefined-builtin

NORMAL: Redefining built-in 'format'
Raw output
Used when a variable or function override a built-in.

Check warning on line 547 in climada_petals/hazard/copernicus_forecast/handler.py

View check run for this annotation

Jenkins - WCR / Pylint

redefined-builtin

NORMAL: Redefining built-in 'vars'
Raw output
Used when a variable or function override a built-in.
data_already_exists = os.path.isfile(file)
if format == 'grib':
engine = 'cfgrib'
else:
engine = None
if data_already_exists:
existing_variables = list(
xr.open_dataset(file, engine=engine, decode_cf=False, chunks={}
).data_vars)
vars_short = [indicator.VAR_SPECS[var]['short_name'] for var in vars]
data_already_exists = set(vars_short).issubset(existing_variables)
return data_already_exists

Loading

0 comments on commit aae38e8

Please sign in to comment.