From 8fc038f46a6db2c1f04e10e6b7c1b0e067afd305 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20S=C3=A9n=C3=A9si?= Date: Mon, 31 May 2021 07:07:35 +0200 Subject: [PATCH] Handle IPSL-CM6 (the feature won't actually work without #1124) --- doc/develop/fixing_data.rst | 133 ++++++--- doc/develop/index.rst | 2 +- doc/quickstart/configure.rst | 94 ++++++- doc/quickstart/find_data.rst | 68 +++-- doc/quickstart/index.rst | 2 +- .../native6-ipsl-cm6-mappings.yml | 256 ++++++++++++++++++ esmvalcore/cmor/_fixes/native6/ipsl_cm6.py | 119 ++++++++ esmvalcore/config-developer.yml | 11 +- esmvalcore/config-user.yml | 24 ++ esmvalcore/preprocessor/_io.py | 2 +- 10 files changed, 628 insertions(+), 83 deletions(-) create mode 100644 esmvalcore/_config/variable_details/native6-ipsl-cm6-mappings.yml create mode 100644 esmvalcore/cmor/_fixes/native6/ipsl_cm6.py diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 6dbe5fe96b..dfae2f00ef 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -1,33 +1,40 @@ .. _fixing_data: -*********** -Dataset fix -*********** - -Some (model) datasets contain (known) errors that would normally prevent them -from being processed correctly by the ESMValCore. The errors can be in -the metadata describing the dataset and/or in the actual data. -Typical examples of such errors are missing or wrong attributes (e.g. -attribute ''units'' says 1e-9 but data are actually in 1e-6), missing or -mislabeled coordinates (e.g. ''lev'' instead of ''plev'' or missing +****************************************** +Adapting to data sources +****************************************** + +The baseline case for ESMValTool input data is CMOR fully compliant +data that is read using Iris load fuction. ESMValTool also allows for +some departures with compliance (see +:ref:`cmor_check_strictness`). Beyond that situation, some datasets +(either model or observations) contain (known) errors that would +normally prevent them from being processed. The issues can be in the +metadata describing the dataset and/or in the actual data. Typical +examples of such errors are missing or wrong attributes (e.g. +attribute ''units'' says 1e-9 but data are actually in 1e-6), missing +or mislabeled coordinates (e.g. ''lev'' instead of ''plev'' or missing coordinate bounds like ''lat_bnds'') or problems with the actual data -(e.g. cloud liquid water only instead of sum of liquid + ice as specified by the CMIP data request). +(e.g. cloud liquid water only instead of sum of liquid + ice as +specified by the CMIP data request). -The ESMValCore can apply on the fly fixes to datasets that have -known errors that can be fixed automatically. - -.. note:: - **CMORization as a fix**. - Support for many observational and reanalysis datasets is implemented through - :ref:`CMORizer scripts in the ESMValTool `. - However, it is also possible to add support for a dataset that is not part of - a CMIP data request by implementing fixes for it. - This is particularly useful for large datasets, where keeping a copy of both - the original and CMORized dataset is not feasible. - See `Natively supported non-CMIP datasets`_ for a list of currently supported - datasets. +As an extreme case, some others data sources simply are not NetCDF +files and must go through other data load function. +The ESMValCore can apply on the fly fixes to such datasets when +issues can be fixed automatically. This is implemented for a set +of `Natively supported non-CMIP datasets`_. The following provide +details on how to design such fixes. +.. note:: + + **CMORizer scripts**. Support for many observational and reanalysis + datasets is also possible through a priori reformating by + :ref:`CMORizer scripts in the ESMValTool `, + which are rather relevant for datasets of small volume + +.. _fix_structure: + Fix structure ============= @@ -326,30 +333,68 @@ strictness to the highest: Natively supported non-CMIP datasets ==================================== -Fixed datasets are supported through the ``native6`` project. -Put the files containing the data in the directory that you have configured -for the ``native6`` project in your :ref:`user configuration file`, in a -subdirectory called ``Tier{tier}/{dataset}/{version}/{frequency}/{short_name}``. -Replace the items in curly braces by the values used in the variable/dataset -definition in the :ref:`recipe `. -Below is a list of datasets currently supported. +Some fixed datasets and native models formats are supported through +the ``native6`` project. -ERA5 ----- +.. _fixing_native_models: -- Supported variables: ``clt``, ``evspsbl``, ``evspsblpot``, ``mrro``, ``pr``, ``prsn``, ``ps``, ``psl``, ``ptype``, ``rls``, ``rlds``, ``rsds``, ``rsdt``, ``rss``, ``uas``, ``vas``, ``tas``, ``tasmax``, ``tasmin``, ``tdps``, ``ts``, ``tsn`` (``E1hr``/``Amon``), ``orog`` (``fx``) -- Tier: 3 +Native models : IPSL-CM6,... +----------------------------- -MSWEP ------ +The following models are natively supported through the procedure +described above (:ref:`fix_structure`) and at +:ref:`configure_native_models`: -- Supported variables: ``pr`` -- Supported frequencies: ``mon``, ``day``, ``3hr``. -- Tier: 3 + - **IPSL-CM6** : both output formats (i.e. the ``Output`` and the + ``Analyse / Time series`` formats) are supported, and should be + configured in recipes as e.g.: -For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location. + .. code-block:: yaml -.. note:: - For monthly data (V220), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc`` + datasets: + - {simulation: CM61-LR-hist-03.1950, exp: piControl, freq: Analyse/TS_MO, + account: p86caub, status: PROD, dataset: IPSL-CM6, project: native6 } + - {simulation: CM61-LR-hist-03.1950, exp: historical, freq: Output/MO, + account: p86caub, status: PROD, dataset: IPSL-CM6, project: native6 } + + The ``Output`` format is an example of a case where variables are + grouped in multi-variable files, which name cannot be computed + directly from datasets attributes alone but requires a mapping + file. These multi-variable files must also undergo some data + selection, which may involve an external process for performance + purpose. + + The ``config-developer.yaml`` section for configuring IPSL-CM6 is + :ref:`illustrated here ` + + + + +ERA5 and MSWEP datasets +----------------------- +Put the files containing the data in the +directory that you have configured for the ``native6`` project in your +:ref:`user configuration file`, in a subdirectory called +``Tier{tier}/{dataset}/{version}/{frequency}/{short_name}``. Replace +the items in curly braces by the values used in the variable/dataset +definition in the :ref:`recipe `. Below is a list of +datasets currently supported : + + - **ERA5** + + - Supported variables: ``clt``, ``evspsbl``, ``evspsblpot``, ``mrro``, ``pr``, ``prsn``, ``ps``, ``psl``, ``ptype``, ``rls``, ``rlds``, ``rsds``, ``rsdt``, ``rss``, ``uas``, ``vas``, ``tas``, ``tasmax``, ``tasmin``, ``tdps``, ``ts``, ``tsn`` (``E1hr``/``Amon``), ``orog`` (``fx``) + - Tier: 3 + + - **MSWEP** + + - Supported variables: ``pr`` + - Supported frequencies: ``mon``, ``day``, ``3hr``. + - Tier: 3 + + For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location. + + .. note:: + + For monthly data (V220), the data must be postfixed with the date, i.e. rename ``global_monthly_050deg.nc`` to ``global_monthly_050deg_197901-201710.nc`` -For more info: http://www.gloh2o.org/ + For more info: http://www.gloh2o.org/ diff --git a/doc/develop/index.rst b/doc/develop/index.rst index e10a5143f0..b65331ed65 100644 --- a/doc/develop/index.rst +++ b/doc/develop/index.rst @@ -10,5 +10,5 @@ features. :maxdepth: 1 Preprocessor function - Dataset fix + Adapting to data sources Deriving a variable diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index cd8a92eca9..2d2dea028a 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -103,7 +103,7 @@ with explanations in a commented line above each option: OBS: ~/obs_inputpath default: ~/default_inputpath - # Directory structure for input data: [default]/BADC/DKRZ/ETHZ/etc + # Directory structure for input data: [default]/BADC/DKRZ/ETHZ/IPSL/etc # See config-developer.yml for definitions. drs: CMIP5: default @@ -176,8 +176,10 @@ It will be installed along with ESMValCore and can also be viewed on GitHub: `esmvalcore/config-developer.yml `_. This configuration file describes the file system structure and CMOR tables for several -key projects (CMIP6, CMIP5, obs4mips, OBS6, OBS) on several key machines (e.g. BADC, CP4CDS, DKRZ, -ETHZ, SMHI, BSC). CMIP data is stored as part of the Earth System Grid +key projects (CMIP6, CMIP5, obs4mips, OBS6, OBS) on several key machines (e.g. +BADC, CP4CDS, DKRZ, ETHZ, SMHI, BSC, IPSL), and for native output data for some +models (IPSL, ... see :ref:`configure_native_models`) . +CMIP data is stored as part of the Earth System Grid Federation (ESGF) and the standards for file naming and paths to files are set out by CMOR and DRS. For a detailed description of these standards and their adoption in ESMValCore, we refer the user to :ref:`CMOR-DRS` section where we @@ -260,9 +262,33 @@ your data please see :ref:`CMOR-DRS`. Preprocessor output files ------------------------- -The filename to use for preprocessed data is configured in a similar manner -using ``output_file``. Note that the extension ``.nc`` (and if applicable, -a start and end time) will automatically be appended to the filename. +The filename to use for preprocessed data is configured in a similar +manner using ``output_file``, which can be either a single value or a +dictionnary of values. + +This latter case is useful for projects which gather much varied cases +with varied set of dataset attributes, such as the native6 project : + +.. _example_IPSL_config: + +.. code-block:: yaml + + native6: + ... + input_dir: + default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}' + IPSL: '{account}/{model}/{status}/{exp}/{simulation}/{igcm_dir}/Analyse/{freq}' + input_file: + default: '*.nc' + IPSL:'{simulation}_*_{ipsl_varname}.nc' + output_file: + default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}' + IPSL: '{account}_{model}_{status}_{exp}_{simulation}_{short_name}' + ... + + +Note that the extension ``.nc`` (and if applicable, a start and end +time) will automatically be appended to the filename. .. _cmor_table_configuration: @@ -289,6 +315,62 @@ related to CMOR table settings available: to get the name of the file containing the ``mip`` table. Defaults to the value provided in ``cmor_type``. +.. _configure_native_models: + +Configuring native models and observation data sets +---------------------------------------------------- + +ESMValTool can take full advantage of the ability to configure +ESMValCore for handling native model output formats and specific +observation data sets without preliminary reformating. Such a +configuration involves the following steps : + + - allowing for ESMValTool to locate the data files : + + - entry ``native6`` of ``config-developer.yml`` should be + complemented with sub-entries for ``input_dir``, ``input_file`` + and ``output_file`` that goes under a new key representing the + data organization (such as ``IPSL``), and these sub-entries can + use an arbitrary list of ``{placeholders}``. Example : + + .. code-block:: yaml + + native6: + cmor_strict: false + input_dir: + default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}' + IPSL: '{account}/{model}/{status}/{exp}/{simulation}/{dir}/{freq}' + input_file: + default: '*.nc' + IPSL: + - '{simulation}_*_{ipsl_varname}.nc' + - '{simulation}_*_{group}.nc' + output_file: + default: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}' + IPSL: '{account}_{model}_{status}_{exp}_{simulation}_{freq}_{short_name}' + cmor_type: 'CMIP6' + cmor_default_table_prefix: 'CMIP6_' + + + - if necessary, provide a so-called ``mapping file`` which allows + to associate a given variable short_name used in a recipe, such + as ``tas``, with a dictionnary of placeholder values; these + values will be used at run time, with ``input_dir`` and + ``input_file`` patterns, to compute the actual filename to load + for that variable; such a file is looked for under pattern + ``native6-*.yml`` at two places : in the source code, at + ``ESMValCore/esmvalcore/_config/variable_details/`` and in user + space, at ``~/.esmvaltool/variable_details``. See here + :download:`an example of such a file for IPSL-CM6 + <../../esmvalcore/_config/variable_details/native6-ipsl-cm6-mappings.yml>`. + All such files in these two places are sorted and loaded in + sequence, first for the code location, second for the + user-space location + + - ensuring that ESMValTool get the right metadata and data out of + your data files : this is described at :ref:`fixing_data` + + .. _config-ref: References configuration file diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index 5e823da409..eb968474d9 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -1,7 +1,7 @@ .. _findingdata: ************ -Finding data +Input data ************ Overview @@ -15,10 +15,13 @@ the right data. We will detail below the data finding and retrieval process and the input the user needs to specify, giving examples on how to use the data finding routine under different scenarios. +Data types +========== + .. _CMOR-DRS: -CMIP data - CMOR Data Reference Syntax (DRS) and the ESGF -========================================================= +CMIP data +--------------------------------------------------------- CMIP data is widely available via the Earth System Grid Federation (`ESGF `_) and is accessible to users either via download from the ESGF portal or through the ESGF data nodes hosted @@ -45,6 +48,39 @@ From the ESMValTool user perspective the number of data input parameters is optimized to allow for ease of use. We detail this procedure in the next section. +Native model data +--------------------------------------------------------- +Support for native model data is quite easy using basic +:ref:`ESMValCore fix procedure ` and is yet implemented +for some models :ref:`as described here ` + +Observational data +--------------------------------------------------------- +Part of observational data is retrieved in the same manner as CMIP data, for example +using the ``OBS`` root path set to: + + .. code-block:: yaml + + OBS: /gws/nopw/j04/esmeval/obsdata-v2 + +and the dataset: + + .. code-block:: yaml + + - {dataset: ERA-Interim, project: OBS, type: reanaly, version: 1, start_year: 2014, end_year: 2015, tier: 3} + +in ``recipe.yml`` in ``datasets`` or ``additional_datasets``, the rules set in +CMOR-DRS_ are used again and the file will be automatically found: + +.. code-block:: + + /gws/nopw/j04/esmeval/obsdata-v2/Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_1_Amon_ta_201401-201412.nc + +Since observational data are organized in Tiers depending on their level of +public availability, the ``default`` directory must be structured accordingly +with sub-directories ``TierX`` (``Tier1``, ``Tier2`` or ``Tier3``), even when +``drs: default``. + .. _data-retrieval: Data retrieval @@ -231,32 +267,6 @@ and finally, using the file naming definition from CMOR-DRS_ find the file: .. _observations: -Observational data -================== -Observational data is retrieved in the same manner as CMIP data, for example -using the ``OBS`` root path set to: - - .. code-block:: yaml - - OBS: /gws/nopw/j04/esmeval/obsdata-v2 - -and the dataset: - - .. code-block:: yaml - - - {dataset: ERA-Interim, project: OBS, type: reanaly, version: 1, start_year: 2014, end_year: 2015, tier: 3} - -in ``recipe.yml`` in ``datasets`` or ``additional_datasets``, the rules set in -CMOR-DRS_ are used again and the file will be automatically found: - -.. code-block:: - - /gws/nopw/j04/esmeval/obsdata-v2/Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_1_Amon_ta_201401-201412.nc - -Since observational data are organized in Tiers depending on their level of -public availability, the ``default`` directory must be structured accordingly -with sub-directories ``TierX`` (``Tier1``, ``Tier2`` or ``Tier3``), even when -``drs: default``. Data loading ============ diff --git a/doc/quickstart/index.rst b/doc/quickstart/index.rst index 4f9887f76f..2fb022bfda 100644 --- a/doc/quickstart/index.rst +++ b/doc/quickstart/index.rst @@ -6,7 +6,7 @@ Getting started Installation Configuration - Finding data + Input data Installed recipes Running Output diff --git a/esmvalcore/_config/variable_details/native6-ipsl-cm6-mappings.yml b/esmvalcore/_config/variable_details/native6-ipsl-cm6-mappings.yml new file mode 100644 index 0000000000..cfd5f419d9 --- /dev/null +++ b/esmvalcore/_config/variable_details/native6-ipsl-cm6-mappings.yml @@ -0,0 +1,256 @@ +# Mapping, for ISPLCM output formats 'Analyse' and 'Output', between a +# CMOR variable name and the labels to use by ESMValTool to find the +# corresponding file, and the corresponding variable in the file +# +# For format 'Analyse', the config-development.yml file tells +# ESMValTool to use key 'ipsl_varname' for building the filename, +# while for format 'Output' it specifies to use key 'group' +# +# Specifying 'igcm_dir' here allows to avoid having to secifiy it in +# datasets definitions +# +# Key 'use_cdo' allows to choose whether CDO will be invoked for +# selecting a variable in a multi-variable file. This generally allows +# for smaller overal load time. But because CDO has a licence which is +# not compliant with ESMValtool licence policy, the default +# configuration is to avoid using it. You may use customized settings +# by installing a modified version of this file as +# ~/.esmvatlool/variable_details/native6-ipsl-cm6-*.yml +# +# Keys scale and offset can be used for handling any unit issue that +# is not dealt wih automatically by the udunits layer in ESMValTool +# (and which makes use of the units metadaat in input files). In that +# case, scaling and offseting are applied automatically by method +# fix_data of class AllVars in cmor/_fixes/*/ipsl.py +# +# The main key below, 'IPSL', is the one to use as a value for +# attribute 'dataset' in the 'datasets' entry of recipes; it matches +# the module name 'ipsl.py' in 'cmor_fixes/ipsl*/ipsl.py' +# +--- +# A series of shortcuts for repetitive settings +ShortCuts: + General: &gene {model: IPSLCM6, use_cdo: false} + ATM3DVARS: &atm3dvars {group: histmthNMC, dir: ATM, <<: *gene} + Atmvars: &atmvars {group: histmth, dir: ATM, <<: *gene} + SrfVars: &srfvars {group: sechiba_history, dir: SRF, <<: *gene} + StoVars: &stovars {group: stomate_history, dir: SBG, <<: *gene} + StiVars: &stivars {group: stomate_ipcc_history, dir: SBG, <<: *gene} + SechVars: &sechvars {group: sechiba_history, dir: SBG, <<: *gene} + OceTVars: &ocetvars {group: grid_T, dir: OCE, <<: *gene} + OceUVars: &oceuvars {group: grid_U, dir: OCE, <<: *gene} + OceVVars: &ocevvars {group: grid_V, dir: OCE, <<: *gene} + OceDvars: &ocedvars {group: diaptr, dir: OCE, <<: *gene} + OcePtr: &oceptr {group: ptrc_T, dir: BGC, <<: *gene} + IceVars: &icevars {group: icemod, dir: ICE, <<: *gene} + + +IPSL-CM6: + # ======================================================================================== + Amon: + # ======================================================================================== + # ATM 3D Variables + ta: {ipsl_varname: ta, <<: *atm3dvars} + ua: {ipsl_varname: ua, <<: *atm3dvars} + va: {ipsl_varname: va, <<: *atm3dvars} + zg: {ipsl_varname: zg, <<: *atm3dvars} + hus: {ipsl_varname: hus, <<: *atm3dvars} + hur: {ipsl_varname: hur, <<: *atm3dvars} + + # ATM general variables + pr: {ipsl_varname: precip, <<: *atmvars} + psl: {ipsl_varname: slp, <<: *atmvars} + tas: {ipsl_varname: t2m, <<: *atmvars} + ts: {ipsl_varname: tsol, <<: *atmvars} + huss: {ipsl_varname: q2m, <<: *atmvars} + uas: {ipsl_varname: u10m, <<: *atmvars} + "vas": {ipsl_varname: v10m, <<: *atmvars} + sfcWind: {ipsl_varname: wind10m, <<: *atmvars} + hurs: {ipsl_varname: rh2m, <<: *atmvars} + + # ATM general variables + precip: {ipsl_varname: precip, <<: *atmvars} + prw: {ipsl_varname: prw, <<: *atmvars} + slp: {ipsl_varname: slp, <<: *atmvars} + t2m: {ipsl_varname: t2m, <<: *atmvars} + q2m: {ipsl_varname: q2m, <<: *atmvars} + u10m: {ipsl_varname: u10m, <<: *atmvars} + v10m: {ipsl_varname: v10m, <<: *atmvars} + wind10m: {ipsl_varname: wind10m, <<: *atmvars} + + # -> Turbulent fluxes + flat: {ipsl_varname: flat, <<: *atmvars, scale: -1.} + sens: {ipsl_varname: sens, <<: *atmvars, scale: -1.} + taux: {ipsl_varname: taux, <<: *atmvars} + tauy: {ipsl_varname: tauy, <<: *atmvars} + + # -> Clouds + cldt: {ipsl_varname: cldt, <<: *atmvars, scale: 100.} + + # -> Radiative down at TOA + SWdnTOA: {ipsl_varname: SWdnTOA, <<: *atmvars} + + # -> Radiative down at TOA + topl: {ipsl_varname: topl, <<: *atmvars} + SWupTOA: {ipsl_varname: SWupTOA, <<: *atmvars} + topl0: {ipsl_varname: topl0, <<: *atmvars} + SWupTOAclr: {ipsl_varname: SWupTOAclr, <<: *atmvars} + + # -> Radiative up at Surface + LWupSFC: {ipsl_varname: LWupSFC, <<: *atmvars} + SWupSFC: {ipsl_varname: SWupSFC, <<: *atmvars} + SWupSFcclr: {ipsl_varname: SWupSFcclr, <<: *atmvars} + LWupSFcclr: {ipsl_varname: LWupSFcclr, <<: *atmvars} + + # -> Radiative down at Surface + LWdnSFC: {ipsl_varname: LWdnSFC, <<: *atmvars} + SWdnSFC: {ipsl_varname: SWdnSFC, <<: *atmvars} + LWdnSFcclr: {ipsl_varname: LWdnSFcclr, <<: *atmvars} + SWdnSFcclr: {ipsl_varname: SWdnSFcclr, <<: *atmvars} + + # -- P - E + hflsevap: {ipsl_varname: hfls, <<: *atmvars, scale: -4.0000e-07} + + # -> Turbulent fluxes + hfls: {ipsl_varname: flat, <<: *atmvars, scale: -1.} + hfss: {ipsl_varname: sens, <<: *atmvars, scale: -1.} + tauu: {ipsl_varname: taux, <<: *atmvars} + tauv: {ipsl_varname: tauy, <<: *atmvars} + + # -> Clouds + clt: {ipsl_varname: cldt, <<: *atmvars, scale: 100.} + cldl: {ipsl_varname: cldl, <<: *atmvars, scale: 100.} + cldm: {ipsl_varname: cldm, <<: *atmvars, scale: 100.} + cldh: {ipsl_varname: cldh, <<: *atmvars, scale: 100.} + + # -> Radiative up at TOA + rlut: {ipsl_varname: topl, <<: *atmvars} + rsut: {ipsl_varname: SWupTOA, <<: *atmvars} + rlutcs: {ipsl_varname: topl0, <<: *atmvars} + rsutcs: {ipsl_varname: SWupTOAclr, <<: *atmvars} + + # -> Radiative down at TOA + rsdt: {ipsl_varname: SWdnTOA, <<: *atmvars} + + # -> Radiative up at Surface + rlus: {ipsl_varname: LWupSFC, <<: *atmvars} + rsus: {ipsl_varname: SWupSFC, <<: *atmvars} + rsuscs: {ipsl_varname: SWupSFcclr, <<: *atmvars} + rluscs: {ipsl_varname: LWupSFcclr, <<: *atmvars} + + # -> Radiative down at Surface + rlds: {ipsl_varname: LWdnSFC, <<: *atmvars} + rsds: {ipsl_varname: SWdnSFC, <<: *atmvars} + rldscs: {ipsl_varname: LWdnSFcclr, <<: *atmvars} + rsdscs: {ipsl_varname: SWdnSFcclr, <<: *atmvars} + + # ======================================================================================== + Lmon: + # ======================================================================================== + # SRF -- Land surface - ORCHIDEE + + # --> !!! This will not stay in the param file !!! -------------------- + fluxlat: {ipsl_varname: fluxlat, <<: *srfvars} + fluxsens: {ipsl_varname: fluxsens, <<: *srfvars} + albnir: {ipsl_varname: alb_nir, <<: *srfvars} + albvis: {ipsl_varname: alb_vis, <<: *srfvars} + tair: {ipsl_varname: tair, <<: *srfvars} + swdown: {ipsl_varname: swdown, <<: *srfvars} + lwdown: {ipsl_varname: lwdown, <<: *srfvars} + transpir: {ipsl_varname: transpir, <<: *srfvars} + evapnu: {ipsl_varname: evapnu, <<: *srfvars} + es: {ipsl_varname: evapnu, <<: *srfvars} + inter: {ipsl_varname: inter, <<: *srfvars} + subli: {ipsl_varname: subli, <<: *srfvars} + evap: {ipsl_varname: evap, <<: *srfvars} + Qs: {ipsl_varname: Qs, <<: *srfvars} + runoff: {ipsl_varname: runoff, <<: *srfvars} + mrros: {ipsl_varname: runoff, <<: *srfvars} + drainage: {ipsl_varname: drainage, <<: *srfvars} + mrrob: {ipsl_varname: drainage, <<: *srfvars} + frac_snow: {ipsl_varname: frac_snow, <<: *srfvars} + snow: {ipsl_varname: snow, <<: *srfvars} + snw_land: {ipsl_varname: snow, <<: *srfvars} + maint_resp: {ipsl_varname: maint_resp, <<: *srfvars} + growth_resp: {ipsl_varname: growth_resp, <<: *srfvars} + hetero_resp: {ipsl_varname: hetero_resp, <<: *srfvars} + maintresp: {ipsl_varname: maint_resp, <<: *srfvars} + growthresp: {ipsl_varname: growth_resp, <<: *srfvars} + heteroresp: {ipsl_varname: hetero_resp, <<: *srfvars} + nee: {ipsl_varname: nee, <<: *srfvars} + + # SBG + total_soil_carb: {ipsl_varname: TOTAL_SOIL_CARB, <<: *stovars} + totalsoilcarb: {ipsl_varname: TOTAL_SOIL_CARB, <<: *stovars} + maxvegetfrac: {ipsl_varname: maxvegetfrac, <<: *sechvars} + vegetfrac: {ipsl_varname: vegetfrac, <<: *sechvars} + lai: {ipsl_varname: lai, <<: *stivars} + cfracgpp: {ipsl_varname: gpp, <<: *stivars} + + # -> alias for the obs + gpptot: {ipsl_varname: gpp, <<: *stivars} + GPP: {ipsl_varname: gpp, <<: *sechvars, scale: 1.e-03} + Contfrac: {ipsl_varname: Contfrac, <<: *sechvars} + + # ============================================================================== + Omon: + # ============================================================================ + # OCE + tos: {ipsl_varname: tos, <<: *ocetvars} + sos: {ipsl_varname: sos, <<: *ocetvars} + thetao: {ipsl_varname: thetao, <<: *ocetvars} + so: {ipsl_varname: so, <<: *ocetvars} + zos: {ipsl_varname: zos, <<: *ocetvars} + mlotst: {ipsl_varname: mldr10_1, <<: *ocetvars} + mlddt02: {ipsl_varname: mld_dt02, <<: *ocetvars} + hc300: {ipsl_varname: hc300, <<: *ocetvars, scale: 1.e-09} + wfo: {ipsl_varname: wfo, <<: *ocetvars} + + # ------------------------------------------------------------ # + + # Aliases to the zonal average (computed on the x axis of the ORCA grid) # + zotemglo: {ipsl_varname: zotemglo, <<: *ocedvars} + zotempac: {ipsl_varname: zotempac, <<: *ocedvars} + zotematl: {ipsl_varname: zotematl, <<: *ocedvars} + zotemind: {ipsl_varname: zotemind, <<: *ocedvars} + zosalglo: {ipsl_varname: zosalglo, <<: *ocedvars} + zosalpac: {ipsl_varname: zosalpac, <<: *ocedvars} + zosalatl: {ipsl_varname: zosalatl, <<: *ocedvars} + zosalind: {ipsl_varname: zosalind, <<: *ocedvars} + zomsfglo: {ipsl_varname: zomsfglo, <<: *ocedvars} + zomsfpac: {ipsl_varname: zomsfpac, <<: *ocedvars} + zomsfatl: {ipsl_varname: zomsfatl, <<: *ocedvars} + zomsfind: {ipsl_varname: zomsfind, <<: *ocedvars} + + # --------------------------------------------------------------- # + + # Aliases to the old IGCM_OUT names + # (to take advantage of offset, scale and filenameVar) -- # + sosstsst: {ipsl_varname: sosstsst, <<: *ocetvars} + sosaline: {ipsl_varname: sosaline, <<: *ocetvars} + votemper: {ipsl_varname: votemper, <<: *ocetvars} + vosaline: {ipsl_varname: vosaline, <<: *ocetvars} + mldr10_3: {ipsl_varname: mldr10_3, <<: *ocetvars} + somx3010: {ipsl_varname: somx3010, <<: *ocetvars} + sohtc300: {ipsl_varname: sohtc300, <<: *ocetvars, scale: 1.e-09} + mld_dt02: {ipsl_varname: mld_dt02, <<: *ocetvars} + + # -- Wind stress curl + tauuo: {ipsl_varname: tauuo, <<: *oceuvars} + tauvo: {ipsl_varname: tauvo, <<: *oceuvars} + + # BGC -> Biogeochemistry + NO3: {ipsl_varname: NO3, <<: *oceptr} + PO4: {ipsl_varname: PO4, <<: *oceptr} + Si: {ipsl_varname: Si, <<: *oceptr} + O2: {ipsl_varname: O2, <<: *oceptr} + + # ==================================================================================== + SImon: + # ==================================================================================== + # ICE + sic: {ipsl_varname: siconc, <<: *icevars, scale: 100.} + sit: {ipsl_varname: sithic, <<: *icevars} + sivolu: {ipsl_varname: sivolu, <<: *icevars} + siconc: {ipsl_varname: siconc, <<: *icevars, scale: 100.} + sithic: {ipsl_varname: sithic, <<: *icevars} diff --git a/esmvalcore/cmor/_fixes/native6/ipsl_cm6.py b/esmvalcore/cmor/_fixes/native6/ipsl_cm6.py new file mode 100644 index 0000000000..ede3202339 --- /dev/null +++ b/esmvalcore/cmor/_fixes/native6/ipsl_cm6.py @@ -0,0 +1,119 @@ +"""Fixes for IPSLCM6 TS output format.""" +import logging +import os +import time + +from ..fix import Fix +from ..shared import add_scalar_height_coord + +logger = logging.getLogger(__name__) + +# The key used in mappings.yml file for providing the +# variable name (in NetCDF file) that match the CMOR variable name +KEY_FOR_VARNAME = "ipsl_varname" + + +class AllVars(Fix): + """Fixes for all IPSLCM variables.""" + def fix_file(self, filepath, output_dir): + """Select IPSLCM variable in filepath, by calling CDO, if relevant. + + This is done only if input file is a multi-variable one. This is + diagnosed by searching in the input filepathame for the mapping + value for key 'group'. + + In such cases, it is worth to use an external tool for + filtering, at least until Iris loads fast (which is not the case + up to, and including, V3.0.2) + + However, we take care of ESMValTool policy re. dependencies licence + """ + if "_" + self.var_mapping.get("group", + "non-sense") + ".nc" not in filepath: + # No need to filter the file + logger.debug("In ipsl-cm6.py : not filtering for %s", filepath) + return filepath + + if not self.var_mapping.get("use_cdo", False): + # The configuration developer doesn't provide CDO, while ESMValTool + # licence policy doesn't allow to include it in dependencies + # Or he considers that plain Iris load is quick enough for + # that file + logger.debug("In ipsl-cm6.py : CDO not activated for %s", filepath) + return filepath + + # Proceed with CDO selvar + varname = self.var_mapping.get(KEY_FOR_VARNAME, self.vardef.short_name) + alt_filepath = filepath.replace(".nc", "_cdo_selected.nc") + outfile = self.get_fixed_filepath(output_dir, alt_filepath) + command = "cdo -selvar,%s %s %s" % (varname, filepath, outfile) + tim1 = time.time() + logger.debug("Using CDO for selecting %s in %s", varname, filepath) + os.system(command) + logger.debug("CDO selection done in %.2f seconds", time.time() - tim1) + return outfile + + def fix_metadata(self, cubes): + """Fix metadata for any IPSLCM variable + filter out other variables. + + Fix the name of the time coordinate, which is called time_counter + in the original file. + + Remove standard_name 'time' in auxiliary time coordinates + """ + logger.debug("Fixing metadata for ipslcm_cm6") + + varname = self.var_mapping.get(KEY_FOR_VARNAME, self.vardef.short_name) + cube = self.get_cube_from_list(cubes, varname) + cube.var_name = self.vardef.short_name + + # Need to degrade auxiliary time coordinates, because some + # iris function does not support to have more than one + # coordinate with standard_name='time' + for coordinate in cube.coords(dim_coords=False): + if coordinate.standard_name == 'time': + coordinate.standard_name = '' + + # Fix variable name for time_counter + for coordinate in cube.coords(dim_coords=True): + if coordinate.var_name == 'time_counter': + coordinate.var_name = 'time' + + return [cube] + + def fix_data(self, cube): + """Apply fixes to the data of the cube. + + Here : scaling and offset according to mapping. + + But needs to be checked vs ESMValTool automatic unit change + when units metadat is present and correct + """ + mapping = self.var_mapping + metadata = cube.metadata + if "scale" in mapping: + cube *= mapping["scale"] + if "offset" in mapping: + cube += mapping["offset"] + cube.metadata = metadata + return cube + + +class Tas(Fix): + """Fixes for ISPLCM 2m temperature.""" + def fix_metadata(self, cubes): + """Add height2m.""" + varname = self.var_mapping.get(KEY_FOR_VARNAME, self.vardef.short_name) + cube = self.get_cube_from_list(cubes, varname) + add_scalar_height_coord(cube) + return cubes + + +class Huss(Fix): + """Fixes for ISPLCM 2m specific humidity.""" + def fix_metadata(self, cubes): + """Add height2m.""" + varname = self.var_mapping.get(KEY_FOR_VARNAME, self.vardef.short_name) + cube = self.get_cube_from_list(cubes, varname) + add_scalar_height_coord(cube) + return cubes diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index a0a242e6a1..9b92a20398 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -112,6 +112,7 @@ CMIP3: default: '/' BADC: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{short_name}/{ensemble}/{latestversion}' DKRZ: '{exp}/{modeling_realm}/{frequency}/{short_name}/{dataset}/{ensemble}' + IPSL: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{ensemble}/{short_name}/{version}/{short_name}' input_file: '{short_name}_*.nc' output_file: '{project}_{institute}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}_{start_year}-{end_year}' cmor_type: 'CMIP3' @@ -192,8 +193,12 @@ native6: cmor_strict: false input_dir: default: 'Tier{tier}/{dataset}/{latestversion}/{frequency}/{short_name}' + IPSL: '{account}/{model}/{status}/{exp}/{simulation}/{dir}/{freq}' input_file: default: '*.nc' + IPSL: + - '{simulation}_*_{ipsl_varname}.nc' + - '{simulation}_*_{group}.nc' output_file: '{project}_{dataset}_{type}_{version}_{mip}_{short_name}' cmor_type: 'CMIP6' cmor_default_table_prefix: 'CMIP6_' @@ -203,7 +208,11 @@ obs4mips: input_dir: default: 'Tier{tier}/{dataset}' RCAST: '/' - input_file: '{short_name}_{dataset}_{level}_{version}_*.nc' + IPSL: '{realm}/{short_name}/{freq}/{grid}/{institute}/${dataset}/{latest_version}' + input_file: + default: '{short_name}_{dataset}_{level}_{version}_*.nc' + RCAST: '{short_name}_{dataset}_{level}_{version}_*.nc' + IPSL: '{short_name}_obs4MIPS_{dataset}_{level}_{version}_*.nc' output_file: '{project}_{dataset}_{level}_{version}_{short_name}' cmor_type: 'CMIP6' cmor_path: 'obs4mips' diff --git a/esmvalcore/config-user.yml b/esmvalcore/config-user.yml index 257b358b06..05ed43d8b9 100644 --- a/esmvalcore/config-user.yml +++ b/esmvalcore/config-user.yml @@ -106,3 +106,27 @@ profile_diagnostic: false # CMIP6: ETHZ # CMIP5: ETHZ # CMIP3: ETHZ + +# Site-specific entries: IPSL +# Uncomment the lines below to locate data on Ciclad at IPSL +#rootpath: +# native6: /thredds/tgcc/store +# CMIP5: /bdd/CMIP5/output +# CMIP6: /bdd/CMIP6 +# CMIP3: /bdd/CMIP3 +# CORDEX: /bdd/CORDEX/output +# obs4mips: /bdd/obs4MIPS/obs-CFMIP/observations +# ana4mips: /not_yet +# OBS: /not_yet +# OBS6: /not_yet +# RAWOBS: /not_yet +#drs: +# native6: IPSL +# CMIP6: DKRZ +# CMIP5: DKRZ +# CMIP3: IPSL +# CORDEX: BADC +# obs4mips: IPSL +# ana4mips: default +# OBS: not_yet +# OBS6: not_yet diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 938e4b6f2e..d4a6f3e86d 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -117,8 +117,8 @@ def load(file, callback=None): category=UserWarning, module='iris', ) - raw_cubes = iris.load_raw(file, callback=callback) + logger.debug("Done with loading") if not raw_cubes: raise Exception('Can not load cubes from {0}'.format(file)) for cube in raw_cubes: