Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/echam streams #1240

Open
wants to merge 15 commits into
base: release
Choose a base branch
from
Open
9 changes: 5 additions & 4 deletions configs/components/echam/echam.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ compile_infos:
make -j `nproc --all`; make install -j `nproc --all`;
install_bins: ''
branch: esm-tools
git-repository: https://git.geomar.de/foci/src/echam.git
git-repository: https://git.geomar.de/foci/src/echam.git
6.3.05p2-foci_oasismct4:
branch: esm-tools-oasis3mct4
git-repository: https://git.geomar.de/foci/src/echam.git
Expand Down Expand Up @@ -183,6 +183,7 @@ adj_input_dir: "${input_dir}/${resolution}"
forcing_dir: "${input_dir}/${resolution}"
greenhouse_dir: "${pool_dir}"
namelist_dir: "${general.esm_namelist_dir}/echam/${version}/${scenario_type}"
has_namelist_streams: True # ECHAM has output filenames defined somehow in the namelist

switch: 1
value : "echam default"
Expand Down Expand Up @@ -585,7 +586,7 @@ choose_with_lco2_emis:
co2ctl:
lco2_emis: true # read carbon emissions; need carbon_emissions.nc in work
lco2_2perc: true # limit maximum co2 growth rate to 2% wrt previous time step
add_choose_scenario: # other loop order (1:add_forcing_files 2:add_choose_scenario) does not work
add_choose_scenario: # other loop order (1:add_forcing_files 2:add_choose_scenario) does not work
HIST:
add_forcing_files:
carbon_emissions: carbon_emissions_hist
Expand Down Expand Up @@ -617,7 +618,7 @@ choose_icb_code:
namelist.echam:
submodelctl:
licb: "${licb}"


choose_wiso_code:
True:
Expand Down Expand Up @@ -687,7 +688,7 @@ forcing_in_work:
sic: "unit.96"
sst: "unit.20"
# ok this is another crazy ECHAM6 speciality
# every year the background aerosol files for 1849 to 1851
# every year the background aerosol files for 1849 to 1851
# need to be linked to the same file for historical/scenario runs
# don't blame me (seb-wahl), blame the MAC-SP developers at MPI
# MAC-SP describes aerosol w.r.t. piControl conditions.
Expand Down
176 changes: 176 additions & 0 deletions src/esm_runscripts/echam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
This module provides functionality specific to the ECHAM component.

The functions included in this module are:

- ``append_namelist_dependent_sources``: Appends namelist dependent sources to the ECHAM
configuration.

These functions are used to manage and update the configuration settings for ECHAM,
particularly focusing on handling namelist files and updating output data sources
based on the configuration parameters.
"""

import f90nml
from loguru import logger


def _get_mvstream_tags_from_namelist(namelist):
"""
Extracts mvstream tags from a given namelist.

Parameters
----------
namelist : str or f90nml.namelist.Namelist
The path to the namelist file or an already parsed namelist object.

Returns
-------
list of str
A list of mvstream tags found in the namelist.

Raises
------
FileNotFoundError
If the namelist file specified by the path cannot be found.
TypeError
If the provided namelist cannot be converted to an f90nml.namelist.Namelist
object.

Examples
--------
Assuming you have a namelist file at ``/tmp/example/namelist.echam`` with the
following contents:

.. code-block:: fortran

! This is the "no output" variant of namelist.echam. It contains absolutely no
! output, and can be used as a template for testing.
!
! Extended by mvstreamctl namelist block, as an example for the mvstream tags.
!
! P. Gierz
! Alfred Wegener Institute, Helmholtz Centre for Polar and Marine Research
! July 2021
!
! P. Gierz
! October 2024

&runctl
dt_start = 2285, 12, 31, 23, 52, 30
dt_stop = 6699, 12, 31, 23, 52, 30
putrerun = 12, 'months', 'last', 0
lfractional_mask = .false.
lresume = .true.
out_datapath = './'
out_expname = 'E280'
rerun_filetype = 4
delta_time = 450
putdata = 1, 'months', 'last', 0
nproma = 8
lcouple = .true.
getocean = 1, 'days', 'last', 0
putocean = 1, 'days', 'last', 0
lcouple_co2 = .true.
default_output = .false.
/

&parctl
nproca = 24
nprocb = 24
/

&submodelctl
lmethox = .true.
/

&submdiagctl
vphysc_lpost = .false.
/

&radctl
iaero = 3
io3 = 4
isolrad = 6
ich4 = 3
in2o = 3
co2vmr = 284.3169860840e-06
ch4vmr = 808.2490234375e-09
n2ovmr = 273.0210571289e-09
yr_perp = 1850
/
&mvstreamctl
filetag = 'paul_custom'
source = 'g3b'
variables = 'temp2:mean>temp2=167'
interval = 1, 'months', 'last', 0
/

The following code will extract the mvstream tags from the namelist:

>>> namelist_path = "/tmp/example/namelist.echam"
>>> tags = _get_mvstream_tags_from_namelist(namelist_path)
>>> print(tags)
['paul_custom']
"""
mvstream_tags = []
if not isinstance(namelist, f90nml.namelist.Namelist):
try:
namelist = f90nml.read(namelist)
except FileNotFoundError:
logger.error(f"Namelist specified by {namelist} could not be found")
except TypeError as e:
logger.error(
f"Could not convert {namelist} to f90nml.namelist.Namelist object."
)
raise e

for chapter, contents in namelist.items():
if chapter == "mvstreamctl":
tag = contents.get("filetag")
if tag is not None:
mvstream_tags.append(tag)
return mvstream_tags


def append_namelist_dependent_sources(config):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this happen for echam but also jsbach? Doesn't jsbach also have its own streams?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a little bit convoluted: jsbach and echam are controlled by the same namelist, so I would need some way to filter out which tags go to which model. In principle this would just be defined by which model config they come from.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See 509ac21.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jsbach and echam are controlled by the same namelist

Do you mean that they can be controlled from the same namelist? If that is the case, then I understand the rest of your reply and the new jsbach.py

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the output generated from jsbach is defined in mvstreamctl chapters of namelist.echam (since JSBACH technically speaking is just a submodule inside of ECHAM), and does not have any output control in it's own namelist.

"""
Append namelist dependent sources to the ECHAM configuration.

This function updates the `outdata_sources` in the ECHAM configuration
based on the namelist objects and other configuration parameters.

Parameters
----------
config : dict
The configuration dictionary containing general, ECHAM, and JSBACH settings.

Notes
-----
- The function reads the namelist from the specified directory if not
already loaded.
- It filters out tags that are to be ignored based on the JSBACH streams
or specified ignore tags.
- The output file type is checked, and if it is NetCDF (indicated by
``out_filetype`` == 2), the file extension `.nc` is appended to the tags.
- The function logs the updates made to the ``outdata_sources``.
"""
expid = config["general"]["expid"]
econfig = config["echam"]
try:
namelist = econfig["namelist_objs"]
except KeyError: # Namelists not yet loaded...
namelist = f90nml.read(econfig["namelist_dir"] + "/namelist.echam")
pgierz marked this conversation as resolved.
Show resolved Hide resolved
mvstream_tags = _get_mvstream_tags_from_namelist(namelist)
jsbach_streams = config["jsbach"]["streams"]
pgierz marked this conversation as resolved.
Show resolved Hide resolved
ignore_these_tags = econfig.get("ignore_tags", jsbach_streams)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the default ignore ignore jsbach_streams? Is it so that the jsbach streams are moved to the jsbach directory instead of echam?

I find this behavior a bit confusing because if I declare an echam.ignore_tags then the jsbach streams won't be ignored anymore, so there is a big change of behavior behind the scenes. What do you think @pgierz? Maybe I am not understanding this part...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, see new commit (coming later today)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in d6aab9.

mvstream_tags = [tag for tag in mvstream_tags if tag not in ignore_these_tags]
mvstream_dict = {tag: f"{expid}*{tag}" for tag in mvstream_tags}
if namelist["runctl"].get("out_filetype") == 2:
# Using NetCDF Outputs:
mvstream_dict = {k: v + ".nc" for k, v in mvstream_dict.items()}
logger.info("Updating outdata_sources...")
for k, v in mvstream_dict.items():
logger.info(f"{k}: {v}")
econfig["outdata_sources"].update(mvstream_dict)
logger.info("...done!")
pgierz marked this conversation as resolved.
Show resolved Hide resolved
25 changes: 21 additions & 4 deletions src/esm_runscripts/filelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import filecmp
import glob
import hashlib
import importlib
import os
import pathlib
import re
Expand All @@ -11,12 +12,11 @@

import f90nml
import yaml
from loguru import logger

import esm_parser
from loguru import logger

from . import helpers
from . import jinja
from . import helpers, jinja


def rename_sources_to_targets(config):
Expand Down Expand Up @@ -225,7 +225,7 @@ def choose_needed_files(config):
new_sources = new_targets = {}
for category, name in config[model][filetype + "_files"].items():
# TODO: change with user_error()
if not name in config[model][filetype + "_sources"]:
if name not in config[model][filetype + "_sources"]:
logger.error(
"Implementation "
+ name
Expand Down Expand Up @@ -1632,10 +1632,27 @@ def get_movement(config, model, category, filetype, source, target):
sys.exit(42)


def append_namelist_dependent_sources(config):
"""If a model has streams defined in the one of it's namelists, append them to the sources here"""
for model in config["general"]["valid_model_names"] + ["general"]:
if config[model].get("has_namelist_streams", False): # Something truthy
try:
model_module = importlib.import_module(f"esm_runscripts.{model}")
# Important: we need to define something that is called append_namelist_dependent_sources in <model>.py
model_module.append_namelist_dependent_sources(config)
except ImportError:
logger.error(
f"Model {model} specifies that it has namelist streams, but there is module to import to handle that..."
)
# keep going...
return config


def assemble(config):
config = complete_all_file_movements(config)
config = rename_sources_to_targets(config)
config = choose_needed_files(config)
config = append_namelist_dependent_sources(config)
config = complete_targets(config)
config = complete_sources(config)
config = reuse_sources(config)
Expand Down
5 changes: 1 addition & 4 deletions src/esm_runscripts/tidy.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
import filecmp
import os
import pathlib
import re
import shutil
import sys
import time

import psutil
from loguru import logger

from . import coupler, database_actions, helpers, logfiles
from . import helpers, logfiles
from .filelists import copy_files, resolve_symlinks


Expand Down
Loading