diff --git a/dag/demography.yml b/dag/demography.yml
index d0e8e54c212..3acd9cf2449 100644
--- a/dag/demography.yml
+++ b/dag/demography.yml
@@ -287,6 +287,13 @@ steps:
data://grapher/demography/2024-12-17/efr_malani_jacob:
- data://garden/demography/2024-12-17/efr_malani_jacob
+ # Mean Age at childbirth (HFD + UN WPP)
+ data://garden/demography/2024-12-18/mean_age_childbearing:
+ - data://garden/un/2024-07-12/un_wpp
+ - data://garden/hmd/2024-11-19/hfd
+ data://grapher/demography/2024-12-18/mean_age_childbearing:
+ - data://garden/demography/2024-12-18/mean_age_childbearing
+
########################################################################
# OTHERS
########################################################################
diff --git a/etl/config.py b/etl/config.py
index c41382cd8e5..25d03772b24 100644
--- a/etl/config.py
+++ b/etl/config.py
@@ -44,6 +44,7 @@ def _normalise_branch(branch_name):
return re.sub(r"[\/\._]", "-", branch_name)
+# NOTE: If you edit this function, make sure to update `get_container_name` in ops repo as well
def get_container_name(branch_name):
normalized_branch = _normalise_branch(branch_name)
@@ -51,7 +52,22 @@ def get_container_name(branch_name):
normalized_branch = normalized_branch.replace("staging-site-", "")
# Ensure the container name is less than 63 characters
- container_name = f"staging-site-{normalized_branch[:50]}"
+ # however, we truncate it to 28 characters to be consistent with Cloudflare's
+ # 28 character limit (see https://community.cloudflare.com/t/algorithm-to-generate-a-preview-dns-subdomain-from-a-branch-name/477633)
+ # TODO: these ifs were added to be backward compatible with existing branches that are longer than 28 characters
+ # remove them once they get merged
+ if normalized_branch in (
+ "variable-selector-catalog-path",
+ "grapher-page-dynamic-thumbnail",
+ "data-fertility-rate-effective",
+ "add-reset-metadata-origin-option",
+ "data-battery-cell-prices-private",
+ ):
+ limit = 50
+ else:
+ limit = 28
+
+ container_name = f"staging-site-{normalized_branch[:limit]}"
# Remove trailing hyphens
return container_name.rstrip("-")
diff --git a/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.meta.yml b/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.meta.yml
new file mode 100644
index 00000000000..7246ba6584c
--- /dev/null
+++ b/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.meta.yml
@@ -0,0 +1,51 @@
+# NOTE: To learn more about the fields, hover over their names.
+definitions:
+ common:
+ presentation:
+ topic_tags:
+ - Fertility Rate
+
+# Learn more about the available fields:
+# http://docs.owid.io/projects/etl/architecture/metadata/reference/
+dataset:
+ update_period_days: 365
+ title: Mean age at childbearing (HFD; UN WPP)
+
+tables:
+ mean_age_childbearing:
+ variables:
+ mean_age_childbearing:
+ title: Mean age at childbearing
+ unit: years
+ description_short: &cb_description_short |-
+ Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. UN Medium projections for 2024-2100.
+ description_processing: |-
+ This indicator is constructed by combining data from multiple sources:
+
+ - Before 1949: Historical estimates by Human Fertility Database (2024).
+
+ - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
+
+ - 2024-2100: Projections based on Medium variant by the UN World Population Prospects (2024 revision).
+ presentation:
+ title_public: |-
+ Mean age at childbearing
+ grapher_config:
+ subtitle: *cb_description_short
+
+ mean_age_childbearing_hist:
+ title: Mean age at childbearing, historical
+ unit: years
+ description_short: &cb_description_short_hist |-
+ Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year.
+ description_processing: |-
+ This indicator is constructed by combining data from multiple sources:
+
+ - Before 1949: Historical estimates by Human Fertility Database (2024).
+
+ - 1950-2023: Population records by the UN World Population Prospects (2024 revision).
+ presentation:
+ title_public: |-
+ Mean age at childbearing
+ grapher_config:
+ subtitle: *cb_description_short_hist
diff --git a/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.py b/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.py
new file mode 100644
index 00000000000..bfd9c881fad
--- /dev/null
+++ b/etl/steps/data/garden/demography/2024-12-18/mean_age_childbearing.py
@@ -0,0 +1,69 @@
+"""Load a meadow dataset and create a garden dataset."""
+
+import pandas as pd
+from owid.catalog import processing as pr
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+# Year constants
+YEAR_WPP_START = 1950
+YEAR_WPP_PROJ_START = 2023
+# Table names
+TABLE_NAME_WPP = "mean_age_childbearing"
+TABLE_NAME_HFD = "period"
+TABLE_NAME_NEW = "mean_age_childbearing"
+# Metric names
+COLUMN_NAME_WPP = "mean_age_childbearing"
+COLUMN_NAME_HFD = "mab"
+COLUMN_NEW_NAME = "mean_age_childbearing"
+
+
+def run(dest_dir: str) -> None:
+ #
+ # Load inputs.
+ #
+ # Load meadow dataset.
+ ds_hfd = paths.load_dataset("hfd")
+ ds_un = paths.load_dataset("un_wpp")
+
+ # Read table from meadow dataset.
+ tb_hfd = ds_hfd.read(TABLE_NAME_HFD)
+ tb_un = ds_un.read(TABLE_NAME_WPP)
+
+ # UN: estimates + medium,
+ tb_un = tb_un.loc[
+ (tb_un["sex"] == "all") & (tb_un["variant"].isin(["medium", "estimates"]) & (tb_un["age"] == "all")),
+ ["country", "year", COLUMN_NAME_WPP],
+ ].rename(columns={COLUMN_NAME_WPP: COLUMN_NEW_NAME})
+
+ # HFD: tfr, birth_order=total,
+ tb_hfd = tb_hfd.loc[
+ ((tb_hfd["birth_order"] == "total") & (tb_hfd["year"] < YEAR_WPP_START)), ["country", "year", COLUMN_NAME_HFD]
+ ].rename(columns={COLUMN_NAME_HFD: COLUMN_NEW_NAME})
+
+ # Concatenate
+ tb = pr.concat([tb_hfd, tb_un], ignore_index=True, short_name=TABLE_NAME_NEW)
+
+ # Add historical variant
+ tb[f"{COLUMN_NEW_NAME}_hist"] = tb[COLUMN_NEW_NAME].copy()
+ tb.loc[tb["year"] > YEAR_WPP_PROJ_START, f"{COLUMN_NEW_NAME}_hist"] = pd.NA
+
+ # Format
+ tb = tb.format(["country", "year"])
+
+ #
+ # Save outputs.
+ #
+ # Create a new garden dataset with the same metadata as the meadow dataset.
+ ds_garden = create_dataset(
+ dest_dir,
+ tables=[tb],
+ check_variables_metadata=True,
+ )
+
+ # Save changes in the new garden dataset.
+ ds_garden.save()
diff --git a/etl/steps/data/garden/hmd/2024-11-19/hfd.py b/etl/steps/data/garden/hmd/2024-11-19/hfd.py
index 4b0b2212937..1a4c5624ed9 100644
--- a/etl/steps/data/garden/hmd/2024-11-19/hfd.py
+++ b/etl/steps/data/garden/hmd/2024-11-19/hfd.py
@@ -529,13 +529,17 @@ def add_shifted_to_cohort(tb):
}
)
- # TODO: move elsewhere
- # Build special table
- years = list(range(1925, tb_period_ages["year"].max() + 1, 5))
- tb_period_years = tb_period_ages.loc[
- tb_period_ages["year"].isin(years) & (tb_period_ages["birth_order"] == "total")
- ].drop(columns=["birth_order"])
+ # Special table: Distribution of period metrics
+ ## Keep only birth_order = total
+ tb_period_years = tb_period_ages.loc[(tb_period_ages["birth_order"] == "total")].drop(columns=["birth_order"])
+ ## Keep only cohorts that are multiples of 5 (from year_min to year_max)
+ year_min = tb_period_years.loc[tb_period_years["year"] % 5 == 0, "year"].min()
+ year_max = tb_period_years["year"].max() + 1
+ years = list(range(year_min, year_max, 5))
+ tb_period_years = tb_period_years.loc[tb_period_years["year"].isin(years)]
+ ## Change age group names 12- -> 12, 55+ -> 55
tb_period_years["age"] = tb_period_years["age"].str.replace("-", "").str.replace("+", "").astype("UInt8")
+ ## HOTFIX: Name of the dimension
tb_period_years = tb_period_years.rename(
columns={
"year": "year_as_dimension",
@@ -579,14 +583,19 @@ def add_shifted_to_cohort(tb):
}
)
- # TODO: move elsewhere
- # Build special table
- years = list(range(1925, tb_cohort_ages["cohort"].max() + 1, 5))
- tb_cohort_years = tb_cohort_ages.loc[
- tb_cohort_ages["cohort"].isin(years) & (tb_cohort_ages["birth_order"] == "total")
- ].drop(columns=["birth_order"])
+ # Special table: Distribution of cohort metrics
+ ## Keep only birth_order = total
+ tb_cohort_years = tb_cohort_ages.loc[(tb_cohort_ages["birth_order"] == "total")].drop(columns=["birth_order"])
+ ## Keep only cohorts that are multiples of 5 (from year_min to year_max)
+ year_min = tb_cohort_years.loc[tb_cohort_years["cohort"] % 5 == 0, "cohort"].min()
+ year_max = tb_cohort_years["cohort"].max() + 1
+ years = list(range(year_min, year_max, 5))
+ tb_cohort_years = tb_cohort_years.loc[tb_cohort_years["cohort"].isin(years)]
+ ## Change age group names 12- -> 12, 55+ -> 55
tb_cohort_years["age"] = tb_cohort_years["age"].str.replace("-", "").str.replace("+", "").astype("UInt8")
- # Fix 12- vs 12, 55+ vs 55 etc.
+ # 'asfr_cohort' and 'ccfr_cohort' don't always use the same names for the same age groups. E.g. 12- vs 12, 55+ vs 55 etc.
+ # Therefore, these age groups are not aligned after the merge. We fix this by grouping + averaging.
+ # The following check ensures that this is actually the case, so that the groupby.mean makes sense!
assert tb_cohort_years.groupby(["country", "cohort", "age"])["asfr_cohort"].nunique().max() == 1
assert tb_cohort_years.groupby(["country", "cohort", "age"])["ccfr_cohort"].nunique().max() == 1
tb_cohort_years = tb_cohort_years.groupby(["country", "cohort", "age"], as_index=False).mean()
diff --git a/etl/steps/data/garden/un/2024-07-12/un_wpp.meta.yml b/etl/steps/data/garden/un/2024-07-12/un_wpp.meta.yml
index 3d59ea4fcb8..b5691c8d869 100644
--- a/etl/steps/data/garden/un/2024-07-12/un_wpp.meta.yml
+++ b/etl/steps/data/garden/un/2024-07-12/un_wpp.meta.yml
@@ -1,8 +1,7 @@
definitions:
global:
- projections:
- <%- if (variant is defined) and (variant != 'estimates') -%>
- Projections from 2024 onwards are based on the UN's << variant >> scenario.
+ projections: <%- if (variant is defined) and (variant != 'estimates') -%>
+ Projections from 2024 onwards are based on the UN's << variant >> scenario.
<%- endif -%>
dimensions:
title: |-
@@ -65,13 +64,10 @@ definitions:
subtitle: "{definitions.global.projections}"
originUrl: "https://ourworldindata.org/population-growth"
-
# this metadata file is not used in garden step, but in grapher step
tables:
population:
-
variables:
-
population:
title: Population
unit: people
@@ -83,7 +79,6 @@ tables:
grapher_config:
note: "Values as of 1 July of the indicated year."
-
population_change:
title: Population change
unit: people
@@ -153,7 +148,6 @@ tables:
subtitle: |-
The natural growth rate is the population change determined by births and deaths. Migration flows are not taken into account. {definitions.global.projections}
-
fertility_rate:
variables:
fertility_rate:
@@ -232,8 +226,7 @@ tables:
title_public: |-
Death rate, {definitions.global.dimensions.title}
grapher_config:
- subtitle:
- The number of deaths occurring during the year, per 1,000 people. {definitions.global.projections}
+ subtitle: The number of deaths occurring during the year, per 1,000 people. {definitions.global.projections}
births:
variables:
@@ -273,18 +266,18 @@ tables:
subtitle: |-
The median age divides the population into two parts of equal size; that is, there are as many people with ages above the median age as there are with ages below. {definitions.global.projections}
- # childbearing_age:
- # variables:
- # childbearing_age:
- # title: Mean age at childbearing
- # unit: years
- # description_short: &cb_description_short |-
- # Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. {definitions.global.dimensions.description_short}
- # presentation:
- # title_public: |-
- # Mean age at childbearing, {definitions.global.dimensions.title}
- # grapher_config:
- # subtitle: *cb_description_short
+ mean_age_childbearing:
+ variables:
+ mean_age_childbearing:
+ title: Mean age at childbearing
+ unit: years
+ description_short: &cb_description_short |-
+ Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. {definitions.global.dimensions.description_short}
+ presentation:
+ title_public: |-
+ Mean age at childbearing, {definitions.global.dimensions.title}
+ grapher_config:
+ subtitle: *cb_description_short
life_expectancy:
variables:
@@ -384,7 +377,6 @@ tables:
Old-age dependency ratio
<%-endif -%>, {definitions.global.dimensions.title}
-
mortality_rate:
variables:
mortality_rate:
diff --git a/etl/steps/data/garden/un/2024-07-12/un_wpp.py b/etl/steps/data/garden/un/2024-07-12/un_wpp.py
index 4911829de96..446ab058540 100644
--- a/etl/steps/data/garden/un/2024-07-12/un_wpp.py
+++ b/etl/steps/data/garden/un/2024-07-12/un_wpp.py
@@ -41,7 +41,7 @@ def run(dest_dir: str) -> None:
tb_median_age = ds_meadow["median_age"].reset_index()
tb_le = ds_meadow["life_expectancy"].reset_index()
tb_mortality = ds_meadow["mortality_rate"].reset_index()
- # tb_childbearing_age = ds_meadow["childbearing_age"].reset_index()
+ tb_childbearing_age = ds_meadow["mean_age_childbearing"].reset_index()
#
# Process data.
@@ -107,6 +107,11 @@ def run(dest_dir: str) -> None:
tb_mortality = set_variant_to_estimates(tb_mortality)
tb_mortality = tb_mortality.format(COLUMNS_INDEX)
+ ## Mean age at childbearing
+ tb_childbearing_age = process_standard(tb_childbearing_age)
+ tb_childbearing_age = set_variant_to_estimates(tb_childbearing_age)
+ tb_childbearing_age = tb_childbearing_age.format(COLUMNS_INDEX)
+
# Build tables list for dataset
tables = [
tb_population,
@@ -121,6 +126,7 @@ def run(dest_dir: str) -> None:
tb_sex_ratio,
tb_mortality,
tb_dependency,
+ tb_childbearing_age,
]
#
diff --git a/etl/steps/data/grapher/demography/2024-12-18/mean_age_childbearing.py b/etl/steps/data/grapher/demography/2024-12-18/mean_age_childbearing.py
new file mode 100644
index 00000000000..2182c1f0002
--- /dev/null
+++ b/etl/steps/data/grapher/demography/2024-12-18/mean_age_childbearing.py
@@ -0,0 +1,28 @@
+"""Load a garden dataset and create a grapher dataset."""
+
+from etl.helpers import PathFinder, create_dataset
+
+# Get paths and naming conventions for current step.
+paths = PathFinder(__file__)
+
+
+def run(dest_dir: str) -> None:
+ #
+ # Load inputs.
+ #
+ # Load garden dataset.
+ ds_garden = paths.load_dataset("mean_age_childbearing")
+
+ # Read table from garden dataset.
+ tb = ds_garden.read("mean_age_childbearing", reset_index=False)
+
+ #
+ # Save outputs.
+ #
+ # Create a new grapher dataset with the same metadata as the garden dataset.
+ ds_grapher = create_dataset(
+ dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
+ )
+
+ # Save changes in the new grapher dataset.
+ ds_grapher.save()
diff --git a/etl/steps/data/grapher/un/2024-07-12/un_wpp.py b/etl/steps/data/grapher/un/2024-07-12/un_wpp.py
index 37155bf0734..dc975ac7346 100644
--- a/etl/steps/data/grapher/un/2024-07-12/un_wpp.py
+++ b/etl/steps/data/grapher/un/2024-07-12/un_wpp.py
@@ -26,7 +26,7 @@ def run(dest_dir: str) -> None:
ds_garden["sex_ratio"],
ds_garden["mortality_rate"],
ds_garden["dependency_ratio"],
- # ds_garden["childbearing_age"],
+ ds_garden["mean_age_childbearing"],
]
#
# Save outputs.
diff --git a/etl/steps/data/meadow/un/2024-07-12/un_wpp.py b/etl/steps/data/meadow/un/2024-07-12/un_wpp.py
index 547f11cb86b..fcd64eb62c8 100644
--- a/etl/steps/data/meadow/un/2024-07-12/un_wpp.py
+++ b/etl/steps/data/meadow/un/2024-07-12/un_wpp.py
@@ -84,6 +84,7 @@ def run(dest_dir: str) -> None:
tb_death_rate = clean_table_standard_xlsx(tb_main, "Crude Death Rate (deaths per 1,000 population)", "death_rate")
tb_birth_rate = clean_table_standard_xlsx(tb_main, "Crude Birth Rate (births per 1,000 population)", "birth_rate")
tb_median_age = clean_table_standard_xlsx(tb_main, "Median Age, as of 1 July (years)", "median_age")
+ tb_macb = clean_table_standard_xlsx(tb_main, "Mean Age Childbearing (years)", "mean_age_childbearing")
tb_mortality = make_tb_mortality(tb_main)
tb_le = make_tb_life_expectancy(tb_main)
@@ -115,7 +116,7 @@ def run(dest_dir: str) -> None:
tb_median_age,
tb_le,
tb_mortality,
- # tb_childbearing_age,
+ tb_macb,
# tb_population_doubling,
]
# Create a new meadow dataset with the same metadata as the snapshot.
diff --git a/lib/catalog/owid/catalog/datasets.py b/lib/catalog/owid/catalog/datasets.py
index 4c2452ab7a5..a1eba76cff1 100644
--- a/lib/catalog/owid/catalog/datasets.py
+++ b/lib/catalog/owid/catalog/datasets.py
@@ -21,7 +21,7 @@
from owid.repack import to_safe_types
from . import tables, utils
-from .meta import SOURCE_EXISTS_OPTIONS, DatasetMeta, TableMeta
+from .meta import SOURCE_EXISTS_OPTIONS, DatasetMeta, TableMeta, VariableMeta
from .processing_log import disable_processing_log
from .properties import metadata_property
@@ -155,7 +155,13 @@ def add(
table_filename = join(self.path, table.metadata.checked_name + f".{format}")
table.to(table_filename, repack=repack)
- def read(self, name: str, reset_index: bool = True, safe_types: bool = True) -> tables.Table:
+ def read(
+ self,
+ name: str,
+ reset_index: bool = True,
+ safe_types: bool = True,
+ reset_metadata: Literal["keep", "keep_origins", "reset"] = "keep",
+ ) -> tables.Table:
"""Read dataset's table from disk. Alternative to ds[table_name], but
with more options to optimize the reading.
@@ -163,6 +169,10 @@ def read(self, name: str, reset_index: bool = True, safe_types: bool = True) ->
large datasets with multi-indexes much faster.
:param safe_types: If true, convert numeric columns to Float64 and Int64 and categorical
columns to string[pyarrow]. This can significantly increase memory usage.
+ :param reset_metadata: Controls variable metadata reset behavior.
+ - "keep": Leave metadata unchanged (default).
+ - "keep_origins": Reset variable metadata but retain the 'origins' attribute.
+ - "reset": Reset all variable metadata.
"""
stem = self.path / Path(name)
@@ -173,6 +183,15 @@ def read(self, name: str, reset_index: bool = True, safe_types: bool = True) ->
t.metadata.dataset = self.metadata
if safe_types:
t = cast(tables.Table, to_safe_types(t))
+ if reset_metadata in ["keep_origins", "reset"]: # Handles "keep_origins" and "reset"
+ t.metadata = TableMeta()
+ for col in t.columns:
+ if reset_metadata == "keep_origins": # Preserve 'origins' attribute
+ origins = t[col].metadata.origins if hasattr(t[col].metadata, "origins") else None
+ t[col].metadata = VariableMeta()
+ t[col].metadata.origins = origins # Preserve 'origins' attribute
+ if reset_metadata == "reset": # Reset all metadata
+ t[col].metadata = VariableMeta()
return t
raise KeyError(f"Table `{name}` not found, available tables: {', '.join(self.table_names)}")
diff --git a/scripts/execution_times.ipynb b/scripts/execution_times.ipynb
new file mode 100644
index 00000000000..08f4fb28038
--- /dev/null
+++ b/scripts/execution_times.ipynb
@@ -0,0 +1,407 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Load execution times"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2m2024-12-19 10:33:58\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mfile.downloaded \u001b[0m \u001b[36mpath\u001b[0m=\u001b[35mPosixPath('/var/folders/0s/2yqr44dj44zcmyzdrf8fvxyc0000gn/T/tmpxyijvoya')\u001b[0m \u001b[36mtarget\u001b[0m=\u001b[35mowid@etl-prod-2:~/etl/.execution_time.json\u001b[0m\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(4659,)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from etl import files\n",
+ "import tempfile\n",
+ "import json\n",
+ "from pathlib import Path\n",
+ "\n",
+ "\n",
+ "with tempfile.NamedTemporaryFile(delete=False) as temp_file:\n",
+ " files.download_file_from_server(Path(temp_file.name), f\"owid@etl-prod-2:~/etl/.execution_time.json\")\n",
+ " temp_file_path = temp_file.name\n",
+ "\n",
+ "with open(temp_file_path, 'r') as f:\n",
+ " data = json.load(f)\n",
+ "\n",
+ "data = pd.Series(data)\n",
+ "data.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Longest steps"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " grapher://grapher/who/2021-07-01/ghe | \n",
+ " 3547 | \n",
+ "
\n",
+ " \n",
+ " data-private://garden/ihme_gbd/2024-05-20/gbd_prevalence | \n",
+ " 1625 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2019/gbd_prevalence | \n",
+ " 1328 | \n",
+ "
\n",
+ " \n",
+ " data://garden/un/2022-07-11/un_wpp | \n",
+ " 1237 | \n",
+ "
\n",
+ " \n",
+ " data://garden/faostat/2024-03-14/faostat_fbsc | \n",
+ " 1224 | \n",
+ "
\n",
+ " \n",
+ " data://meadow/un/2022-07-11/un_wpp | \n",
+ " 1191 | \n",
+ "
\n",
+ " \n",
+ " data-private://garden/ihme_gbd/2024-05-20/gbd_cause | \n",
+ " 1090 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2019/gbd_cause | \n",
+ " 970 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2024-05-20/gbd_risk | \n",
+ " 934 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2024-05-20/gbd_prevalence | \n",
+ " 850 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/un/2024-08-27/un_sdg | \n",
+ " 836 | \n",
+ "
\n",
+ " \n",
+ " data://garden/who/2024-07-30/ghe | \n",
+ " 832 | \n",
+ "
\n",
+ " \n",
+ " data://garden/who/2022-09-30/ghe | \n",
+ " 798 | \n",
+ "
\n",
+ " \n",
+ " data://meadow/demography/2024-12-06/wittgenstein_human_capital_historical | \n",
+ " 768 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2024-05-20/gbd_child_mortality | \n",
+ " 741 | \n",
+ "
\n",
+ " \n",
+ " data-private://meadow/ihme_gbd/2024-05-20/gbd_cause | \n",
+ " 731 | \n",
+ "
\n",
+ " \n",
+ " data://meadow/faostat/2024-03-14/faostat_ti | \n",
+ " 632 | \n",
+ "
\n",
+ " \n",
+ " data://meadow/faostat/2024-03-14/faostat_tcl | \n",
+ " 611 | \n",
+ "
\n",
+ " \n",
+ " data-private://garden/ihme_gbd/2024-05-20/gbd_risk | \n",
+ " 601 | \n",
+ "
\n",
+ " \n",
+ " grapher://grapher/ihme_gbd/2024-05-20/gbd_cause | \n",
+ " 583 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.sort_values(ascending=False).head(20).to_frame('t').style.format('{:.0f}').bar(color='orange')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Time per step type & channel"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " t | \n",
+ "
\n",
+ " \n",
+ " channel | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " grapher://grapher | \n",
+ " 21617 | \n",
+ "
\n",
+ " \n",
+ " data://garden | \n",
+ " 19388 | \n",
+ "
\n",
+ " \n",
+ " data://meadow | \n",
+ " 14428 | \n",
+ "
\n",
+ " \n",
+ " data://grapher | \n",
+ " 4956 | \n",
+ "
\n",
+ " \n",
+ " backport://backport | \n",
+ " 1518 | \n",
+ "
\n",
+ " \n",
+ " data://explorers | \n",
+ " 1011 | \n",
+ "
\n",
+ " \n",
+ " data://open_numbers | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " export://multidim | \n",
+ " 15 | \n",
+ "
\n",
+ " \n",
+ " data://external | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " data://examples | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " export://github | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " github://open-numbers | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = data.to_frame('t').reset_index().rename(columns={'index': 'step'})\n",
+ "df['channel'] = df.step.map(lambda s: '/'.join(s.split('/')[:3])).str.replace('-private', '')\n",
+ "df = df[~df['channel'].str.contains('snapshot://')]\n",
+ "df.groupby('channel').t.sum().sort_values(ascending=False).to_frame('t').style.format('{:.0f}').bar(color='orange')"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/snapshots/covid/latest/cases_deaths.csv.dvc b/snapshots/covid/latest/cases_deaths.csv.dvc
index 08413b00dcf..3d7b64be800 100644
--- a/snapshots/covid/latest/cases_deaths.csv.dvc
+++ b/snapshots/covid/latest/cases_deaths.csv.dvc
@@ -22,7 +22,7 @@ meta:
version_producer: WHO COVID-19 Dashboard - Daily cases and deaths
url_main: https://covid19.who.int/
url_download: https://srhdpeuwpubsa.blob.core.windows.net/whdh/COVID/WHO-COVID-19-global-daily-data.csv
- date_accessed: 2024-12-18
+ date_accessed: 2024-12-19
date_published: '2024-07-07'
license:
name: CC BY 4.0
diff --git a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc
index 9b02ca78871..07be27e9feb 100644
--- a/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc
+++ b/snapshots/excess_mortality/latest/hmd_stmf.csv.dvc
@@ -13,7 +13,7 @@ meta:
HMD provides an online STMF visualization toolkit (https://mpidr.shinyapps.io/stmortality).
url: https://www.mortality.org/Data/STMF
source_data_url: https://www.mortality.org/File/GetDocument/Public/STMF/Outputs/stmf.csv
- date_accessed: 2024-12-18
+ date_accessed: 2024-12-19
publication_date: 2024-12-16
publication_year: 2024
published_by: |-
diff --git a/snapshots/excess_mortality/latest/wmd.csv.dvc b/snapshots/excess_mortality/latest/wmd.csv.dvc
index b04b74352b8..28610b10cbc 100644
--- a/snapshots/excess_mortality/latest/wmd.csv.dvc
+++ b/snapshots/excess_mortality/latest/wmd.csv.dvc
@@ -13,7 +13,7 @@ meta:
Published paper available at https://elifesciences.org/articles/69336.
url: https://github.com/akarlinsky/world_mortality/
source_data_url: https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv
- date_accessed: 2024-12-18
+ date_accessed: 2024-12-19
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc
index 9edd13b8157..caede9a1d18 100644
--- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc
+++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak.csv.dvc
@@ -7,7 +7,7 @@ meta:
For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
url: https://github.com/dkobak/excess-mortality
source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-per-year.csv
- date_accessed: 2024-12-18
+ date_accessed: 2024-12-19
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
diff --git a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc
index 588a61a1ddd..eccdea10d23 100644
--- a/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc
+++ b/snapshots/excess_mortality/latest/xm_karlinsky_kobak_ages.csv.dvc
@@ -6,7 +6,7 @@ meta:
For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
url: https://github.com/dkobak/excess-mortality
source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-stmf.csv
- date_accessed: 2024-12-18
+ date_accessed: 2024-12-19
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
diff --git a/snapshots/who/latest/fluid.csv.dvc b/snapshots/who/latest/fluid.csv.dvc
index 9009202f688..0d9ee8b1b5f 100644
--- a/snapshots/who/latest/fluid.csv.dvc
+++ b/snapshots/who/latest/fluid.csv.dvc
@@ -16,6 +16,6 @@ meta:
The platform accommodates both qualitative and quantitative data which facilitates the tracking of global trends, spread, intensity, and impact of influenza. These data are made freely available to health policy makers in order to assist them in making informed decisions regarding the management of influenza.
wdir: ../../../data/snapshots/who/latest
outs:
- - md5: 10e879a731adb05be7cc0e00705cbcde
- size: 168441072
+ - md5: d1de585937e948804d24ca43c6b9b9ca
+ size: 168443277
path: fluid.csv
diff --git a/snapshots/who/latest/flunet.csv.dvc b/snapshots/who/latest/flunet.csv.dvc
index 3961c7a5727..e47ba0c6d94 100644
--- a/snapshots/who/latest/flunet.csv.dvc
+++ b/snapshots/who/latest/flunet.csv.dvc
@@ -16,6 +16,6 @@ meta:
The data are provided remotely by National Influenza Centres (NICs) of the Global Influenza Surveillance and Response System (GISRS) and other national influenza reference laboratories collaborating actively with GISRS, or are uploaded from WHO regional databases.
wdir: ../../../data/snapshots/who/latest
outs:
- - md5: 758e11c75557b09a39a774ea460ea308
- size: 27260626
+ - md5: 24f1ed44cab948430fcb290eaf9ebe21
+ size: 27263705
path: flunet.csv
diff --git a/snapshots/who/latest/monkeypox.csv.dvc b/snapshots/who/latest/monkeypox.csv.dvc
index 43439fe9ce3..3af85c68f7e 100644
--- a/snapshots/who/latest/monkeypox.csv.dvc
+++ b/snapshots/who/latest/monkeypox.csv.dvc
@@ -24,6 +24,6 @@ meta:
name: CC BY 4.0
outs:
- - md5: 55ca84faa2028764bbde4a308f466cd8
- size: 603658
+ - md5: 6fcd38511d268c888864f3f0760026a0
+ size: 615661
path: monkeypox.csv