Skip to content

Commit

Permalink
Merge branch 'master' into data-fertility-rate-effective
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Dec 19, 2024
2 parents 68b61fb + 7ae6528 commit 212e752
Show file tree
Hide file tree
Showing 20 changed files with 658 additions and 53 deletions.
7 changes: 7 additions & 0 deletions dag/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,13 @@ steps:
data://grapher/demography/2024-12-17/efr_malani_jacob:
- data://garden/demography/2024-12-17/efr_malani_jacob

# Mean Age at childbirth (HFD + UN WPP)
data://garden/demography/2024-12-18/mean_age_childbearing:
- data://garden/un/2024-07-12/un_wpp
- data://garden/hmd/2024-11-19/hfd
data://grapher/demography/2024-12-18/mean_age_childbearing:
- data://garden/demography/2024-12-18/mean_age_childbearing

########################################################################
# OTHERS
########################################################################
Expand Down
18 changes: 17 additions & 1 deletion etl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,30 @@ def _normalise_branch(branch_name):
return re.sub(r"[\/\._]", "-", branch_name)


# NOTE: If you edit this function, make sure to update `get_container_name` in ops repo as well
def get_container_name(branch_name):
normalized_branch = _normalise_branch(branch_name)

# Strip staging-site- prefix to add it back later
normalized_branch = normalized_branch.replace("staging-site-", "")

# Ensure the container name is less than 63 characters
container_name = f"staging-site-{normalized_branch[:50]}"
# however, we truncate it to 28 characters to be consistent with Cloudflare's
# 28 character limit (see https://community.cloudflare.com/t/algorithm-to-generate-a-preview-dns-subdomain-from-a-branch-name/477633)
# TODO: these ifs were added to be backward compatible with existing branches that are longer than 28 characters
# remove them once they get merged
if normalized_branch in (
"variable-selector-catalog-path",
"grapher-page-dynamic-thumbnail",
"data-fertility-rate-effective",
"add-reset-metadata-origin-option",
"data-battery-cell-prices-private",
):
limit = 50
else:
limit = 28

container_name = f"staging-site-{normalized_branch[:limit]}"
# Remove trailing hyphens
return container_name.rstrip("-")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Fertility Rate

# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365
title: Mean age at childbearing (HFD; UN WPP)

tables:
mean_age_childbearing:
variables:
mean_age_childbearing:
title: Mean age at childbearing
unit: years
description_short: &cb_description_short |-
Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. UN Medium projections for 2024-2100.
description_processing: |-
This indicator is constructed by combining data from multiple sources:
- Before 1949: Historical estimates by Human Fertility Database (2024).
- 1950-2023: Population records by the UN World Population Prospects (2024 revision).
- 2024-2100: Projections based on Medium variant by the UN World Population Prospects (2024 revision).
presentation:
title_public: |-
Mean age at childbearing
grapher_config:
subtitle: *cb_description_short

mean_age_childbearing_hist:
title: Mean age at childbearing, historical
unit: years
description_short: &cb_description_short_hist |-
Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year.
description_processing: |-
This indicator is constructed by combining data from multiple sources:
- Before 1949: Historical estimates by Human Fertility Database (2024).
- 1950-2023: Population records by the UN World Population Prospects (2024 revision).
presentation:
title_public: |-
Mean age at childbearing
grapher_config:
subtitle: *cb_description_short_hist
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Load a meadow dataset and create a garden dataset."""

import pandas as pd
from owid.catalog import processing as pr

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


# Year constants
YEAR_WPP_START = 1950
YEAR_WPP_PROJ_START = 2023
# Table names
TABLE_NAME_WPP = "mean_age_childbearing"
TABLE_NAME_HFD = "period"
TABLE_NAME_NEW = "mean_age_childbearing"
# Metric names
COLUMN_NAME_WPP = "mean_age_childbearing"
COLUMN_NAME_HFD = "mab"
COLUMN_NEW_NAME = "mean_age_childbearing"


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_hfd = paths.load_dataset("hfd")
ds_un = paths.load_dataset("un_wpp")

# Read table from meadow dataset.
tb_hfd = ds_hfd.read(TABLE_NAME_HFD)
tb_un = ds_un.read(TABLE_NAME_WPP)

# UN: estimates + medium,
tb_un = tb_un.loc[
(tb_un["sex"] == "all") & (tb_un["variant"].isin(["medium", "estimates"]) & (tb_un["age"] == "all")),
["country", "year", COLUMN_NAME_WPP],
].rename(columns={COLUMN_NAME_WPP: COLUMN_NEW_NAME})

# HFD: tfr, birth_order=total,
tb_hfd = tb_hfd.loc[
((tb_hfd["birth_order"] == "total") & (tb_hfd["year"] < YEAR_WPP_START)), ["country", "year", COLUMN_NAME_HFD]
].rename(columns={COLUMN_NAME_HFD: COLUMN_NEW_NAME})

# Concatenate
tb = pr.concat([tb_hfd, tb_un], ignore_index=True, short_name=TABLE_NAME_NEW)

# Add historical variant
tb[f"{COLUMN_NEW_NAME}_hist"] = tb[COLUMN_NEW_NAME].copy()
tb.loc[tb["year"] > YEAR_WPP_PROJ_START, f"{COLUMN_NEW_NAME}_hist"] = pd.NA

# Format
tb = tb.format(["country", "year"])

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir,
tables=[tb],
check_variables_metadata=True,
)

# Save changes in the new garden dataset.
ds_garden.save()
35 changes: 22 additions & 13 deletions etl/steps/data/garden/hmd/2024-11-19/hfd.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,13 +529,17 @@ def add_shifted_to_cohort(tb):
}
)

# TODO: move elsewhere
# Build special table
years = list(range(1925, tb_period_ages["year"].max() + 1, 5))
tb_period_years = tb_period_ages.loc[
tb_period_ages["year"].isin(years) & (tb_period_ages["birth_order"] == "total")
].drop(columns=["birth_order"])
# Special table: Distribution of period metrics
## Keep only birth_order = total
tb_period_years = tb_period_ages.loc[(tb_period_ages["birth_order"] == "total")].drop(columns=["birth_order"])
## Keep only cohorts that are multiples of 5 (from year_min to year_max)
year_min = tb_period_years.loc[tb_period_years["year"] % 5 == 0, "year"].min()
year_max = tb_period_years["year"].max() + 1
years = list(range(year_min, year_max, 5))
tb_period_years = tb_period_years.loc[tb_period_years["year"].isin(years)]
## Change age group names 12- -> 12, 55+ -> 55
tb_period_years["age"] = tb_period_years["age"].str.replace("-", "").str.replace("+", "").astype("UInt8")
## HOTFIX: Name of the dimension
tb_period_years = tb_period_years.rename(
columns={
"year": "year_as_dimension",
Expand Down Expand Up @@ -579,14 +583,19 @@ def add_shifted_to_cohort(tb):
}
)

# TODO: move elsewhere
# Build special table
years = list(range(1925, tb_cohort_ages["cohort"].max() + 1, 5))
tb_cohort_years = tb_cohort_ages.loc[
tb_cohort_ages["cohort"].isin(years) & (tb_cohort_ages["birth_order"] == "total")
].drop(columns=["birth_order"])
# Special table: Distribution of cohort metrics
## Keep only birth_order = total
tb_cohort_years = tb_cohort_ages.loc[(tb_cohort_ages["birth_order"] == "total")].drop(columns=["birth_order"])
## Keep only cohorts that are multiples of 5 (from year_min to year_max)
year_min = tb_cohort_years.loc[tb_cohort_years["cohort"] % 5 == 0, "cohort"].min()
year_max = tb_cohort_years["cohort"].max() + 1
years = list(range(year_min, year_max, 5))
tb_cohort_years = tb_cohort_years.loc[tb_cohort_years["cohort"].isin(years)]
## Change age group names 12- -> 12, 55+ -> 55
tb_cohort_years["age"] = tb_cohort_years["age"].str.replace("-", "").str.replace("+", "").astype("UInt8")
# Fix 12- vs 12, 55+ vs 55 etc.
# 'asfr_cohort' and 'ccfr_cohort' don't always use the same names for the same age groups. E.g. 12- vs 12, 55+ vs 55 etc.
# Therefore, these age groups are not aligned after the merge. We fix this by grouping + averaging.
# The following check ensures that this is actually the case, so that the groupby.mean makes sense!
assert tb_cohort_years.groupby(["country", "cohort", "age"])["asfr_cohort"].nunique().max() == 1
assert tb_cohort_years.groupby(["country", "cohort", "age"])["ccfr_cohort"].nunique().max() == 1
tb_cohort_years = tb_cohort_years.groupby(["country", "cohort", "age"], as_index=False).mean()
Expand Down
38 changes: 15 additions & 23 deletions etl/steps/data/garden/un/2024-07-12/un_wpp.meta.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
definitions:
global:
projections:
<%- if (variant is defined) and (variant != 'estimates') -%>
Projections from 2024 onwards are based on the UN's << variant >> scenario.
projections: <%- if (variant is defined) and (variant != 'estimates') -%>
Projections from 2024 onwards are based on the UN's << variant >> scenario.
<%- endif -%>
dimensions:
title: |-
Expand Down Expand Up @@ -65,13 +64,10 @@ definitions:
subtitle: "{definitions.global.projections}"
originUrl: "https://ourworldindata.org/population-growth"


# this metadata file is not used in garden step, but in grapher step
tables:
population:

variables:

population:
title: Population
unit: people
Expand All @@ -83,7 +79,6 @@ tables:
grapher_config:
note: "Values as of 1 July of the indicated year."


population_change:
title: Population change
unit: people
Expand Down Expand Up @@ -153,7 +148,6 @@ tables:
subtitle: |-
The natural growth rate is the population change determined by births and deaths. Migration flows are not taken into account. {definitions.global.projections}
fertility_rate:
variables:
fertility_rate:
Expand Down Expand Up @@ -232,8 +226,7 @@ tables:
title_public: |-
Death rate, {definitions.global.dimensions.title}
grapher_config:
subtitle:
The number of deaths occurring during the year, per 1,000 people. {definitions.global.projections}
subtitle: The number of deaths occurring during the year, per 1,000 people. {definitions.global.projections}

births:
variables:
Expand Down Expand Up @@ -273,18 +266,18 @@ tables:
subtitle: |-
The median age divides the population into two parts of equal size; that is, there are as many people with ages above the median age as there are with ages below. {definitions.global.projections}
# childbearing_age:
# variables:
# childbearing_age:
# title: Mean age at childbearing
# unit: years
# description_short: &cb_description_short |-
# Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. {definitions.global.dimensions.description_short}
# presentation:
# title_public: |-
# Mean age at childbearing, {definitions.global.dimensions.title}
# grapher_config:
# subtitle: *cb_description_short
mean_age_childbearing:
variables:
mean_age_childbearing:
title: Mean age at childbearing
unit: years
description_short: &cb_description_short |-
Mean age of mothers at the birth of their children if women were subject throughout their lives to the age-specific fertility rates observed in a given year. {definitions.global.dimensions.description_short}
presentation:
title_public: |-
Mean age at childbearing, {definitions.global.dimensions.title}
grapher_config:
subtitle: *cb_description_short

life_expectancy:
variables:
Expand Down Expand Up @@ -384,7 +377,6 @@ tables:
Old-age dependency ratio
<%-endif -%>, {definitions.global.dimensions.title}
mortality_rate:
variables:
mortality_rate:
Expand Down
8 changes: 7 additions & 1 deletion etl/steps/data/garden/un/2024-07-12/un_wpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def run(dest_dir: str) -> None:
tb_median_age = ds_meadow["median_age"].reset_index()
tb_le = ds_meadow["life_expectancy"].reset_index()
tb_mortality = ds_meadow["mortality_rate"].reset_index()
# tb_childbearing_age = ds_meadow["childbearing_age"].reset_index()
tb_childbearing_age = ds_meadow["mean_age_childbearing"].reset_index()

#
# Process data.
Expand Down Expand Up @@ -107,6 +107,11 @@ def run(dest_dir: str) -> None:
tb_mortality = set_variant_to_estimates(tb_mortality)
tb_mortality = tb_mortality.format(COLUMNS_INDEX)

## Mean age at childbearing
tb_childbearing_age = process_standard(tb_childbearing_age)
tb_childbearing_age = set_variant_to_estimates(tb_childbearing_age)
tb_childbearing_age = tb_childbearing_age.format(COLUMNS_INDEX)

# Build tables list for dataset
tables = [
tb_population,
Expand All @@ -121,6 +126,7 @@ def run(dest_dir: str) -> None:
tb_sex_ratio,
tb_mortality,
tb_dependency,
tb_childbearing_age,
]

#
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Load a garden dataset and create a grapher dataset."""

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset.
ds_garden = paths.load_dataset("mean_age_childbearing")

# Read table from garden dataset.
tb = ds_garden.read("mean_age_childbearing", reset_index=False)

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_garden.metadata
)

# Save changes in the new grapher dataset.
ds_grapher.save()
2 changes: 1 addition & 1 deletion etl/steps/data/grapher/un/2024-07-12/un_wpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def run(dest_dir: str) -> None:
ds_garden["sex_ratio"],
ds_garden["mortality_rate"],
ds_garden["dependency_ratio"],
# ds_garden["childbearing_age"],
ds_garden["mean_age_childbearing"],
]
#
# Save outputs.
Expand Down
3 changes: 2 additions & 1 deletion etl/steps/data/meadow/un/2024-07-12/un_wpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ def run(dest_dir: str) -> None:
tb_death_rate = clean_table_standard_xlsx(tb_main, "Crude Death Rate (deaths per 1,000 population)", "death_rate")
tb_birth_rate = clean_table_standard_xlsx(tb_main, "Crude Birth Rate (births per 1,000 population)", "birth_rate")
tb_median_age = clean_table_standard_xlsx(tb_main, "Median Age, as of 1 July (years)", "median_age")
tb_macb = clean_table_standard_xlsx(tb_main, "Mean Age Childbearing (years)", "mean_age_childbearing")
tb_mortality = make_tb_mortality(tb_main)
tb_le = make_tb_life_expectancy(tb_main)

Expand Down Expand Up @@ -115,7 +116,7 @@ def run(dest_dir: str) -> None:
tb_median_age,
tb_le,
tb_mortality,
# tb_childbearing_age,
tb_macb,
# tb_population_doubling,
]
# Create a new meadow dataset with the same metadata as the snapshot.
Expand Down
Loading

0 comments on commit 212e752

Please sign in to comment.