diff --git a/src/pymorize/controlled_vocabularies.py b/src/pymorize/controlled_vocabularies.py
index 35f2c161..4587e46d 100644
--- a/src/pymorize/controlled_vocabularies.py
+++ b/src/pymorize/controlled_vocabularies.py
@@ -5,6 +5,8 @@
 import glob
 import json
 import os
+import re
+import requests
 
 
 class ControlledVocabularies(dict):
@@ -66,3 +68,53 @@ def dict_from_json_file(path):
                 return json.load(file)
         except json.JSONDecodeError as e:
             raise ValueError(f"file {path}: {e.msg}")
+
+    @classmethod
+    def load_from_git(cls, tag: str = "6.2.58.73"):
+        """Load the controlled vocabularies from the git repository
+
+        Parameters
+        ----------
+        tag : str
+            The git tag to use. Default is 6.2.58.73
+            If tag is None, the main branch is used.
+        Returns
+        -------
+        ControlledVocabularies
+            A new ControlledVocabularies object, behaves like a dictionary.
+        """
+        if tag is None:
+            tag = "refs/heads/main"
+        else:
+            tag = "blob/" + tag
+        url = f"https://raw.githubusercontent.com/WCRP-CMIP/CMIP6_CVs/{tag}"
+        filenames = (
+            "CMIP6_DRS.json",
+            "CMIP6_activity_id.json",
+            "CMIP6_experiment_id.json",
+            "CMIP6_frequency.json",
+            "CMIP6_grid_label.json",
+            "CMIP6_institution_id.json",
+            "CMIP6_license.json",
+            "CMIP6_nominal_resolution.json",
+            "CMIP6_realm.json",
+            "CMIP6_required_global_attributes.json",
+            "CMIP6_source_id.json",
+            "CMIP6_source_type.json",
+            "CMIP6_sub_experiment_id.json",
+            "CMIP6_table_id.json",
+            "mip_era.json",
+        )
+        name_pattern = re.compile(r"^(?:CMIP6_)?(?P<name>[^\.]+)\.json$").match
+        data = {}
+        for fname in filenames:
+            name = name_pattern(fname).groupdict().get("name")
+            fpath = "/".join([url, fname])
+            r = requests.get(fpath)
+            r.raise_for_status()
+            content = r.content.decode()
+            content = json.loads(content)
+            data[name] = content.get(name)
+        obj = cls([])
+        obj.update(data)
+        return obj
diff --git a/src/pymorize/global_attributes.py b/src/pymorize/global_attributes.py
new file mode 100644
index 00000000..4a6a9b58
--- /dev/null
+++ b/src/pymorize/global_attributes.py
@@ -0,0 +1,186 @@
+# global_attributes.py
+
+import json
+import re
+from pathlib import Path
+
+from .controlled_vocabularies import ControlledVocabularies
+
+# from loguru import logger
+
+cv = ControlledVocabularies.load_from_git()
+
+required_global_attributes = cv["required_global_attributes"]
+
+_parent_fields = (
+    "branch_method",
+    "branch_time_in_child",
+    "branch_time_in_parent",
+    "parent_experiment_id",
+    "parent_mip_era",
+    "parent_source_id",
+    "parent_time_units",
+    "parent_variant_label",
+)
+
+
+defaults = {
+    "institution_id": "AWI",
+    "license_type": "CC BY-SA 4.0",
+    "maintainer_url": None,
+}
+
+
+def set_global_attributes(ds, rule):
+    gattrs = {}
+    variant_label = rule.get("variant_label")
+    update_variant_label(variant_label, gattrs)
+    variable_id = rule.data_request_variable.variable_id
+    gattrs["variable_id"] = variable_id
+    gattrs["table_id"] = rule.data_request_variable.table.table_id
+    _update_global_attributes_from_table_header(gattrs, rule)
+    gattrs["source_id"] = source_id = rule.get("source_id")
+    source_id_cv = cv["source_id"][source_id]
+    _institution_id = source_id_cv.get("institution_id")
+    if len(_institution_id) > 1:
+        institution_ids = ", ".join(_institution_id)
+        institution_id = rule.get("institution_id")
+        if institution_id is None:
+            raise ValueError(
+                f"institution_id -- {institution_ids} -- has multiple value for source_id {source_id}."
+            )
+        else:
+            assert institution_id in _institution_id
+    else:
+        institution_id = _institution_id[0]
+    gattrs["institution_id"] = institution_id
+    license_type = source_id_cv["license_info"]["id"]
+    further_info_url = rule.get("further_info_url")
+    _update_license(gattrs, cv, institution_id, license_type, further_info_url)
+    gattrs["source"] = source = rule.get("source")  # model_component
+    gattrs["grid"] = source_id_cv["model_component"][source]["description"]
+    gattrs["nominal_resolution"] = source_id_cv["model_component"][source][
+        "native_nominal_resolution"
+    ]
+    gattrs["source_type"] = rule.get("source_type")
+    experiment_id = rule.get("experiment_id")
+    activity_id = rule.get("activity_id", None)
+    if activity_id is None:
+        _experiment_id_cv = cv.get("experiment_id", {}).get(experiment_id, {})
+        activity_id = _experiment_id_cv.get("activity_id", [])
+        if activity_id and len(activity_id) > 1:
+            activity_ids = ", ".join(activity_id)
+            raise ValueError(
+                f"activity_id -- {activity_ids} -- has multiple value for experiment_id {experiment_id}."
+            )
+        elif activity_id:
+            activity_id = activity_id[0]
+        else:
+            raise ValueError(f"no activity_id found for experiment_id {experiment_id}")
+    gattrs["activity_id"] = activity_id
+    gattrs["experiment"] = _experiment_id_cv.get("experiment", "")
+    gattrs["experiment_id"] = experiment_id
+    # ignore parent_experiment_id for now, in the first iteration
+    # parent_activity_id = _experiment_id_cv.get("parent_activity_id", "")
+    gattrs["sub_experiment"] = rule.get("sub_experiment", "")
+    gattrs["sub_experiment_id"] = _experiment_id_cv.get("sub_experiment_id")
+
+
+def _update_global_attributes_from_table_header(gattrs, rule):
+    """Updates global attributes from table header"""
+    table = rule.data_request_variable.table
+    header = table._data["Header"]
+    gattrs["data_specs_version"] = header["data_specs_version"]
+    gattrs["Conventions"] = header["Conventions"]
+    gattrs["mip_era"] = header["mip_era"]
+    gattrs["realm"] = header["realm"]
+    gattrs["product"] = header["product"]
+
+
+def _parse_variant_label(label: str) -> dict:
+    """Extracts indices values from variant label.
+    `label` must be of the form "r<int>i<int>p<int>f<int>".
+    Example: "r1i1p1f1"
+    """
+    pattern = re.compile(
+        r"r(?P<realization_index>\d+)"
+        r"i(?P<initialization_index>\d+)"
+        r"p(?P<physics_index>\d+)"
+        r"f(?P<forcing_index>\d+)"
+        r"$"
+    )
+    if label is None:
+        raise ValueError(
+            f"`label` must be of the form 'r<int>i<int>p<int>f<int>', Got: {label}"
+        )
+    d = pattern.match(label)
+    if d is None:
+        raise ValueError(
+            f"`label` must be of the form 'r<int>i<int>p<int>f<int>', Got: {label}"
+        )
+    d = {name: int(val) for name, val in d.groupdict().items()}
+    return d
+
+
+def _update_variant_label(label: str, gattrs: dict) -> dict:
+    "Add variant_label to global attributes"
+    variant_label_indices = _parse_variant_label(label)
+    gattrs |= variant_label_indices
+    gattrs["variant_label"] = label
+    return gattrs
+
+
+def _update_license(
+    gattrs: dict,
+    cv: dict,
+    institution_id: str = None,
+    license_type: str = None,
+    further_info_url: str = None,
+):
+    """
+    Updates the license attribute in the global attributes dictionary.
+
+    Args:
+        gattrs (dict): The global attributes dictionary to update.
+        cv (dict): The controlled vocabulary dictionary.
+        institution_id (str, optional): The institution ID. Defaults to None.
+        license_type (str, optional): The license type. Defaults to None.
+        further_info_url (str, optional): The maintainer URL. Defaults to None.
+
+    Returns:
+        None
+
+    Raises:
+        None
+    """
+
+    institution_id = institution_id or defaults.get("institution_id")
+    license_type = license_type or defaults.get("license_type")
+    further_info_url = further_info_url or defaults.get("further_info_url")
+    logger.debug(f"{institution_id=}")
+    logger.debug(f"{license_type=}")
+    logger.debug(f"{further_info_url=}")
+    lic = cv["license"]
+    license_text = lic["license"]
+    license_id = lic["license_options"][license_type]["license_id"]
+    license_url = lic["license_options"][license_type]["license_url"]
+    if further_info_url is None:
+        logger.debug(
+            "Removing placeholder for maintainer url from license text as it is not provided."
+        )
+        license_text = re.sub(r"\[.*?\]", "", license_text)
+    institution = cv["institution_id"][institution_id]
+
+    def make_placeholders(text):
+        return re.sub(r"<.*?>", "{}", text)
+
+    logger.debug(
+        "Creating place-holders in license template found in CMIP6_license.json"
+    )
+    text = make_placeholders(license_text)
+    if further_info_url is None:
+        text = text.format(institution, license_id, license_url)
+    else:
+        text = text.format(institution, license_id, license_url, further_info_url)
+    logger.debug(f"License: {text}")
+    gattrs["license"] = text
diff --git a/src/pymorize/global_attributes_checklist.org b/src/pymorize/global_attributes_checklist.org
new file mode 100644
index 00000000..42be7d41
--- /dev/null
+++ b/src/pymorize/global_attributes_checklist.org
@@ -0,0 +1,86 @@
+* GLOBAL ATTRIBUTES
+
+reference CMIP6_required_global_attributes.json
+
+| index | NAME                 | IMPLEMENTED | SOURCE        | EXAMPLE                                           |
+|-------+----------------------+-------------+---------------+---------------------------------------------------|
+|     1 | activity_id          | x           | USER          | CMIP                                              |
+|     2 | Conventions          | x           | table         | CF-1.7 CMIP-6.2                                   |
+|     3 | creation_date        |             |               | 2018-12-18T12:00:00Z                              |
+|     4 | data_specs_version   | x           | USER / table  | 01.00.27                                          |
+|     5 | experiment           |             |               | piControl                                         |
+|     6 | experiment_id        |             |               | piControl                                         |
+|     7 | forcing_index        | x           | derived from  | 1                                                 |
+|       |                      |             | variant_label |                                                   |
+|     8 | frequency            | x           | table         | mon                                               |
+|     9 | further_info_url     | x           | USER          | *too_long_to_list_here                            |
+|       |                      |             | (optional,    |                                                   |
+|       |                      |             | default:      |                                                   |
+|       |                      |             | None)         |                                                   |
+|    10 | grid                 |             |               | *too_long_to_list_here                            |
+|    11 | grid_label           |             |               | gn                                                |
+|    12 | initialization_index | x           | derived from  | 1                                                 |
+|       |                      |             | variant_label |                                                   |
+|    13 | institution          |             |               | *too_long_to_list_here                            |
+|    14 | institution_id       | x           | using         | AWI                                               |
+|       |                      |             | default: AWI  |                                                   |
+|    15 | license              | x           | CV            | *too_long_to_list_here                            |
+|    16 | mip_era              | x           | table         | CMIP6                                             |
+|    17 | nominal_resolution   |             |               | 25 km                                             |
+|    18 | physics_index        | x           | derived from  | 1                                                 |
+|       |                      |             | variant_label |                                                   |
+|    19 | product              | x           | table         | model-output                                      |
+|    20 | realization_index    | x           | derived from  | 1                                                 |
+|       |                      |             | variant_label |                                                   |
+|    21 | realm                | x           | table         | ocean                                             |
+|    22 | source               |             |               | AWI-CM-1-1-MR                                     |
+|    23 | source_id            |             |               | AWI-CM-1-1-MR                                     |
+|    24 | source_type          |             |               | AOGCM                                             |
+|    25 | sub_experiment       |             |               | none                                              |
+|    26 | sub_experiment_id    |             |               | none                                              |
+|    27 | table_id             | x           | USER / use    | Omon                                              |
+|       |                      |             | all matching  |                                                   |
+|       |                      |             | tables        |                                                   |
+|    28 | tracking_id          |             |               | hdl:21.14100/84bfc093-b0a3-44ee-b733-91239b6fa6b2 |
+|    29 | variable_id          | x           | USER          | fgco2                                             |
+|    30 | variant_label        | x           | USER          | r1i1p1f1                                          |
+
+
+
+EXAMPLE
+  - further_info_url: "https://furtherinfo.es-doc.org/CMIP6.AWI.AWI-CM-1-1-MR.piControl.none.r1i1p1f1" ;
+  - grid: "FESOM 1.4 (unstructured grid in the horizontal with 830305 wet nodes; 46 levels; top grid cell 0-5 m)"
+  - institution: "Alfred Wegener Institute, Helmholtz Centre for Polar and Marine Research, Am Handelshafen 12, 27570 Bremerhaven, Germany"
+  - license: "CMIP6 model data produced by Alfred Wegener Institute, Helmholtz
+               Centre for Polar and Marine Research, Am Handelshafen 12, 27570 Bremerhaven,
+               Germany is licensed under a Creative Commons Attribution-ShareAlike 4.0
+               International License (https://creativecommons.org/licenses/). Consult
+               https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6
+               output, including citation requirements and proper acknowledgment. Further
+               information about this data, including some limitations, can be found via
+               the further_info_url (recorded as a global attribute in this file). The data
+               producers and data providers make no warranty, either express or implied,
+               including, but not limited to, warranties of merchantability and fitness for
+               a particular purpose. All liabilities arising from the supply of the
+               information (including any liability arising in negligence) are excluded to
+               the fullest extent permitted by law."
+  - branch_method: "standard"
+  - branch_time_in_child: 0.
+  - branch_time_in_parent: 182622.
+  - parent_activity_id: "CMIP"
+  - parent_experiment_id: "piControl-spinup"
+  - parent_mip_era: "CMIP6"
+  - parent_source_id: "AWI-CM-1-1-MR"
+  - parent_time_units: "days since 1901-1-1"
+  - parent_variant_label: "r1i1p1f1"
+
+
+COMMENT
+  4. `data_specs_version`: At the moment using Tables with a specific version ("01.00.13" for instance).
+     Exposing this attribute to user means, fetching user defined version tables from git.
+     Currently not implemented.
+  27. `table_id`: [Optional] A CMOR_variable may be in more than one table.
+     If user does not specify a table_id, then all matching table for this variable
+     is considered.
+
+
diff --git a/tests/configs/fesom_sample.yaml b/tests/configs/fesom_sample.yaml
index 630e0c8e..a989e551 100644
--- a/tests/configs/fesom_sample.yaml
+++ b/tests/configs/fesom_sample.yaml
@@ -52,3 +52,4 @@ rules:
     input_patterns:
       - /path/to/fesom/output/files/*_temp.nc
     output_file: temp.nc
+
diff --git a/tests/test_global_attributes.py b/tests/test_global_attributes.py
new file mode 100644
index 00000000..face8a3e
--- /dev/null
+++ b/tests/test_global_attributes.py
@@ -0,0 +1,140 @@
+import pytest
+from pymorize.global_attributes import parse_variant_label, update_variant_label
+import pymorize.global_attributes as ga
+
+
+simple_cases = [
+    # (label, expected)
+    (
+        "r1i2p3f4",
+        {
+            "realization_index": 1,
+            "initialization_index": 2,
+            "physics_index": 3,
+            "forcing_index": 4,
+        },
+    ),
+    (
+        "r10i20p30f40",
+        {
+            "realization_index": 10,
+            "initialization_index": 20,
+            "physics_index": 30,
+            "forcing_index": 40,
+        },
+    ),
+    (
+        "r0i0p0f0",
+        {
+            "realization_index": 0,
+            "initialization_index": 0,
+            "physics_index": 0,
+            "forcing_index": 0,
+        },
+    ),
+]
+
+
+@pytest.mark.parametrize("label, expected", simple_cases)
+def test_parse_variant_label_realistic_labels(label, expected):
+    result = parse_variant_label(label)
+    assert result == expected, f"Failed Test ID: {label}"
+
+
+edge_cases = [
+    (
+        "r01i02p03f04",
+        {
+            "realization_index": 1,
+            "initialization_index": 2,
+            "physics_index": 3,
+            "forcing_index": 4,
+        },
+    ),
+    (
+        "r001i0002p0003f0004",
+        {
+            "realization_index": 1,
+            "initialization_index": 2,
+            "physics_index": 3,
+            "forcing_index": 4,
+        },
+    ),
+]
+
+
+@pytest.mark.parametrize("label, expected", edge_cases)
+def test_parse_variant_label_edge_cases(label, expected):
+    result = parse_variant_label(label)
+    assert result == expected, f"Failed Test ID: {label}"
+
+
+error_cases = [
+    ("r1i2p3", pytest.raises(ValueError)),
+    ("r1i2p3f", pytest.raises(ValueError)),
+    ("1i2p3f4", pytest.raises(ValueError)),
+    ("r1i2p3f4x", pytest.raises(ValueError)),
+    ("", pytest.raises(ValueError)),
+    (None, pytest.raises(ValueError)),
+    # negitive indices not supported. should they be?
+    ("r-1i-2p-3f-4", pytest.raises(ValueError)),
+    # strict match, no trailing extra characters
+    ("r1i2p3f4a0b1", pytest.raises(ValueError)),
+    # strict match, no leading extra characters
+    ("c2d2r1i2p3f4", pytest.raises(ValueError)),
+    # strict match, no leading or trailing extra characters
+    ("c2d2r1i2p3f4a0b1", pytest.raises(ValueError)),
+    # no spaces
+    ("r1 i2 p3 f4", pytest.raises(ValueError)),
+    ("r 1 i 2 p 3 f 4", pytest.raises(ValueError)),
+]
+
+
+@pytest.mark.parametrize("label, exception", error_cases)
+def test_parse_variant_label_error_cases(label, exception):
+    with exception:
+        parse_variant_label(label)
+
+
+def test_update_variant_label_adds_label_to_gattrs():
+    label = "r10i20p30f40"
+    d = {}
+    update_variant_label(label=label, gattrs=d)
+    assert label == d["variant_label"]
+
+
+def test_update_variant_label_overrides_existing_label():
+    label = "r10i20p30f40"
+    d = {
+        "realization_index": 1,
+        "initialization_index": 2,
+        "physics_index": 3,
+        "forcing_index": 4,
+        "variant_label": "r1i2p3f4",
+    }
+    update_variant_label(label=label, gattrs=d)
+    assert d["variant_label"] == label
+    assert d["realization_index"] == 10
+    assert d["initialization_index"] == 20
+    assert d["physics_index"] == 30
+    assert d["forcing_index"] == 40
+
+
+def test_update_license_with_no_extra_arguments():
+    cv = {
+        "institution_id": {"AWI": "AWI"},
+        "license": {
+            "license": "CMI6 model <Institution> under license <CC; license_id> License (<insert matching license>). further_info_url [and at <maintained by modeling group>]. yata yata yata.",
+            "license_options": {
+                "CC BY-SA 4.0": {
+                    "license_id": "Creative Common",
+                    "license_url": "https://cc.org",
+                }
+            },
+        },
+    }
+    d = {}
+    ga.update_license(d, cv)
+    assert "license" in d
+    assert "Creative Common" in d["license"]
+    assert "AWI" in d["license"]
diff --git a/tests/unit/test_global_attributes.py b/tests/unit/test_global_attributes.py
new file mode 100644
index 00000000..f24722cb
--- /dev/null
+++ b/tests/unit/test_global_attributes.py
@@ -0,0 +1,54 @@
+from pymorize.global_attributes import set_global_attributes
+
+# Name, expected pass
+required_attributes = {
+    ("activity_id", True),
+    ("Conventions", True),
+    ("creation_date", True),
+    ("data_specs_version", True),
+    ("experiment", True),
+    ("experiment_id", True),
+    ("forcing_index", True),
+    ("frequency", True),
+    ("further_info_url", True),
+    ("grid", True),
+    ("grid_label", True),
+    ("initialization_index", True),
+    ("institution", True),
+    ("institution_id", True),
+    ("license", True),
+    ("mip_era", True),
+    ("nominal_resolution", True),
+    ("physics_index", True),
+    ("product", True),
+    ("realization_index", True),
+    ("realm", True),
+    ("source", True),
+    ("source_id", True),
+    ("source_type", True),
+    ("sub_experiment", True),
+    ("sub_experiment_id", True),
+    ("table_id", True),
+    ("tracking_id", False),
+    ("variable_id", True),
+    ("variant_label", True),
+}
+
+
+@pytest.mark.parametrize("added_attributes, expected_pass", required_attributes)
+def test_global_attributes_has_expected_attributes(
+    pi_uxarray_temp_rule, pi_uxarray_data, added_attributes, expected_pass
+):
+    if not expected_pass:
+        pytest.xfail(f"Test should fail with attribute {added_attributes}")
+    matching_files = [
+        f
+        for f in (pi_uxarray_data / "outdata/fesom/").iterdir()
+        if f.name.startswith("temp.fesom")
+    ]
+    ds = xr.open_mfdataset(
+        matching_files,
+        engine="h5netcdf",
+    )
+    ds_out = set_global_attributes(ds, pi_uxarray_temp_rule)
+    assert added_attributes in ds_out.attrs