Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CDAT Migration Phase 2: Refactor diurnal_cycle set #819

Merged
merged 26 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
386560b
Refactor driver
tomvothecoder Jun 13, 2024
3a6e07f
Add initial changes to `diurnal_cycle.py`
tomvothecoder Jun 13, 2024
352a781
Add test setup
tomvothecoder Jun 20, 2024
9b6e969
Add fix for land and sea mask and initial diurnal cycle changes
tomvothecoder Jun 20, 2024
dd18c00
Fix subsetting on region with diff lon axis orientation
tomvothecoder Jun 21, 2024
7015390
Additional refactoring work
tomvothecoder Jun 21, 2024
fa0f88e
Update plotter variable and func names and docstrings
tomvothecoder Jun 21, 2024
d148439
Fix writng variables to netcdf
tomvothecoder Jun 21, 2024
4262a22
Fix writing maxtime as phase and logger for start time format
tomvothecoder Jun 21, 2024
6fa191c
Add latest regression test updates
tomvothecoder Jun 27, 2024
a7f3246
Add final regression test
tomvothecoder Jun 27, 2024
18e5af1
Fix TypeError with | annotation
tomvothecoder Jun 27, 2024
faaae7c
Revert `diurnal_cycle.py` changes
tomvothecoder Jun 27, 2024
4d9b5f6
Remove `test.py`
tomvothecoder Jun 27, 2024
64a6fef
Update `diurnal_cycle_xr.py` docstrings
tomvothecoder Jul 1, 2024
be4bcbd
Update e3sm_diags/driver/utils/diurnal_cycle_xr.py
tomvothecoder Jul 1, 2024
ef27211
Rename function
tomvothecoder Jul 1, 2024
a07520c
Remove unused function
tomvothecoder Jul 1, 2024
a72a519
Apply suggestions from code review
tomvothecoder Jul 1, 2024
2c87552
Apply suggestions from code review
tomvothecoder Jul 1, 2024
2f37294
Fix long_name attr not saving to viewer and add png notebook
tomvothecoder Jul 2, 2024
b42ce79
Fix utility for getting diff of pngs
tomvothecoder Jul 3, 2024
3223a54
Fix x axis ticks
tomvothecoder Jul 15, 2024
be2f813
Fix plot not being centered on longitude axis
tomvothecoder Jul 22, 2024
7c452b7
Apply suggestions from code review
tomvothecoder Jul 22, 2024
edea2bd
Fix pre-commit issue
tomvothecoder Jul 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions auxiliary_tools/cdat_regression_testing/666-diurnal-cycle/run.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[#]
sets = ["diurnal_cycle"]
case_id = "TRMM-3B43v-7_3hr"
variables = ["PRECT"]
ref_name = "TRMM-3B43v-7_3hr"
# seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
seasons = ["ANN"]
# regions = ["CONUS", "20S20N", "W_Pacific", "Amazon","global","50S50N"]
regions = ["20S20N"]
reference_name = "TRMM-3B43v-7"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# %%
# python -m auxiliary_tools.cdat_regression_testing.666-diurnal-cycle.run_script
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "diurnal_cycle"
SET_DIR = "666-diurnal-cycle"
CFG_PATH: str | None = None
# CFG_PATH: str | None = "/global/u2/v/vo13/E3SM-Project/e3sm_diags/auxiliary_tools/cdat_regression_testing/666-diurnal-cycle/run.cfg"
MULTIPROCESSING = True

# %%
run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)

Large diffs are not rendered by default.

29 changes: 24 additions & 5 deletions auxiliary_tools/cdat_regression_testing/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import math
import os
from typing import List

import pandas as pd
Expand Down Expand Up @@ -163,7 +164,7 @@ def get_num_metrics_above_diff_thres(
)


def get_image_diffs(actual_path: str, expected_path: str):
def get_image_diffs(actual_path: str, expected_path: str) -> str | None:
"""Get the diffs between two images.

This function is useful for comparing two datasets that can't be compared
Expand All @@ -183,10 +184,28 @@ def get_image_diffs(actual_path: str, expected_path: str):
expected_png = Image.open(expected_path).convert("RGB")

diff = ImageChops.difference(actual_png, expected_png)

draw = ImageDraw.Draw(diff)
(left, upper, right, lower) = diff.getbbox()
draw.rectangle(((left, upper), (right, lower)), outline="red")

diff_path = actual_path.replace("actual", "diff")
try:
(left, upper, right, lower) = diff.getbbox()
except TypeError as e:
if "cannot unpack non-iterable NoneType object" in str(e):
print(" * Plots are identical")

return None
else:
draw.rectangle(((left, upper), (right, lower)), outline="red")

# Create the diff directory.
split_actual_path = actual_path.split("/")
split_actual_path[-2] = f"{split_actual_path[-2]}_diff"
actual_dir_path = ("/").join(split_actual_path[0:-1])
os.makedirs(actual_dir_path, exist_ok=True)

# Save the png file to the diff directory.
diff_path = ("/").join(split_actual_path)
diff.save(diff_path)

print(f" * Difference path {diff_path}")

return diff_path
160 changes: 88 additions & 72 deletions e3sm_diags/driver/diurnal_cycle_driver.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

import cdms2
import xarray as xr

import e3sm_diags
from e3sm_diags.driver import utils
from e3sm_diags.driver.utils.dataset_xr import Dataset
from e3sm_diags.driver.utils.diurnal_cycle_xr import composite_diurnal_cycle
from e3sm_diags.driver.utils.io import _get_output_filename_filepath
from e3sm_diags.driver.utils.regrid import _apply_land_sea_mask, _subset_on_region
from e3sm_diags.logger import custom_logger
from e3sm_diags.plot import plot
from e3sm_diags.plot.diurnal_cycle_plot import plot as plot_func

logger = custom_logger(__name__)

Expand All @@ -22,94 +23,109 @@ def run_diag(parameter: DiurnalCycleParameter) -> DiurnalCycleParameter:
ref_name = getattr(parameter, "ref_name", "")
regions = parameter.regions

test_data = utils.dataset.Dataset(parameter, test=True)
ref_data = utils.dataset.Dataset(parameter, ref=True)

for season in seasons:
# Get the name of the data, appended with the years averaged.
parameter.test_name_yrs = utils.general.get_name_and_yrs(
parameter, test_data, season
)
parameter.ref_name_yrs = utils.general.get_name_and_yrs(
parameter, ref_data, season
)

# Get land/ocean fraction for masking.
try:
land_frac = test_data.get_climo_variable("LANDFRAC", season)
ocean_frac = test_data.get_climo_variable("OCNFRAC", season)
except Exception:
mask_path = os.path.join(
e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc"
)
with cdms2.open(mask_path) as f:
land_frac = f("LANDFRAC")
ocean_frac = f("OCNFRAC")

for var in variables:
logger.info("Variable: {}".format(var))
test = test_data.get_climo_variable(var, season)
ref = ref_data.get_climo_variable(var, season)

parameter.var_id = var
parameter.viewer_descr[var] = (
test.long_name
if hasattr(test, "long_name")
else "No long_name attr in test data."
)
test_ds = Dataset(parameter, data_type="test")
ref_ds = Dataset(parameter, data_type="ref")

for var_key in variables:
logger.info("Variable: {}".format(var_key))
parameter.var_id = var_key

for season in seasons:
parameter._set_name_yrs_attrs(test_ds, ref_ds, season)

ds_land_sea_mask: xr.Dataset = test_ds._get_land_sea_mask(season)

ds_test = test_ds.get_climo_dataset(var_key, season)
ds_ref = ref_ds.get_climo_dataset(var_key, season)

for region in regions:
test_domain = utils.general.select_region(
region, test, land_frac, ocean_frac, parameter
)
ref_domain = utils.general.select_region(
region, ref, land_frac, ocean_frac, parameter
if "land" in region or "ocean" in region:
test_domain = _apply_land_sea_mask(
ds_test,
ds_land_sea_mask,
var_key,
region, # type: ignore
parameter.regrid_tool,
parameter.regrid_method,
)

ref_domain = _apply_land_sea_mask(
ds_ref,
ds_land_sea_mask,
var_key,
region, # type: ignore
parameter.regrid_tool,
parameter.regrid_method,
)
else:
test_domain = ds_test.copy()
ref_domain = ds_ref.copy()

test_domain = _subset_on_region(test_domain, var_key, region)
ref_domain = _subset_on_region(ref_domain, var_key, region)

parameter.viewer_descr[var_key] = ds_test[var_key].attrs.get(
"long_name", "No long_name attr in test data."
)

parameter.output_file = "-".join([ref_name, var, season, region])
parameter.output_file = "-".join([ref_name, var_key, season, region])
parameter.main_title = str(
" ".join([var, "Diurnal Cycle ", season, region])
" ".join([var_key, "Diurnal Cycle ", season, region])
)

(
test_cmean,
test_amplitude,
test_maxtime,
) = utils.diurnal_cycle.composite_diurnal_cycle(test_domain, season)
) = composite_diurnal_cycle(
test_domain, var_key, season
) # type: ignore
(
ref_cmean,
ref_amplitude,
ref_maxtime,
) = utils.diurnal_cycle.composite_diurnal_cycle(ref_domain, season)
) = composite_diurnal_cycle(
ref_domain, var_key, season
) # type: ignore

parameter.var_region = region
plot(
parameter.current_set,

plot_func(
test_maxtime,
test_amplitude,
ref_maxtime,
ref_amplitude,
parameter,
)
utils.general.save_ncfiles(
parameter.current_set,
test_cmean,
ref_cmean,
None,
parameter,
)
utils.general.save_ncfiles(
parameter.current_set,
test_amplitude,
ref_amplitude,
None,
parameter,

ds_out_test = xr.Dataset(
data_vars={
test_cmean.name: test_cmean,
test_amplitude.name: test_amplitude,
test_maxtime.name: test_maxtime,
}
)
utils.general.save_ncfiles(
parameter.current_set,
test_maxtime,
ref_maxtime,
None,
parameter,
ds_out_ref = xr.Dataset(
data_vars={
ref_cmean.name: ref_cmean,
ref_amplitude.name: ref_amplitude,
ref_maxtime.name: ref_maxtime,
}
)

_write_vars_to_netcdf(parameter, var_key, ds_out_test, "test")
_write_vars_to_netcdf(parameter, var_key, ds_out_ref, "ref")

return parameter


def _write_vars_to_netcdf(
parameter: DiurnalCycleParameter,
var_key: str,
ds: xr.Dataset,
data_type: Literal["test", "ref"],
):
_, filepath = _get_output_filename_filepath(parameter, data_type)

ds.to_netcdf(filepath)

logger.info(f"'{var_key}' {data_type} variable output saved in: {filepath}")
Loading
Loading