Skip to content

Commit

Permalink
add projections
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Dec 18, 2024
1 parent 748c2f3 commit 68b61fb
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 16 deletions.
3 changes: 3 additions & 0 deletions dag/demography.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ steps:
- snapshot://un/2024-12-02/un_wpp_lt_m.csv
- snapshot://un/2024-12-02/un_wpp_lt_all.csv
- snapshot://un/2024-12-02/un_wpp_lt_f.csv
- snapshot://un/2024-12-02/un_wpp_lt_proj_m.csv
- snapshot://un/2024-12-02/un_wpp_lt_proj_all.csv
- snapshot://un/2024-12-02/un_wpp_lt_proj_f.csv
data://garden/un/2024-12-02/un_wpp_lt:
- data://meadow/un/2024-12-02/un_wpp_lt

Expand Down
4 changes: 3 additions & 1 deletion etl/steps/data/garden/un/2024-12-02/un_wpp_lt.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dataset:
# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/tables/
tables:
un_wpp_lt:
un_wpp_lt: &table_metadata
variables:
central_death_rate:
title: Central death rate
Expand Down Expand Up @@ -102,3 +102,5 @@ tables:
<%- else -%>
It refers to the remaining life expectancy for people who have already survived to the given age.
<%- endif -%>
un_wpp_lt_proj: *table_metadata
43 changes: 29 additions & 14 deletions etl/steps/data/garden/un/2024-12-02/un_wpp_lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
"time": "year",
"agegrpstart": "age",
}
COLUMNS_INDEX = [
"location",
"year",
"sex",
"age",
]
COLUMNS_INDEX = ["location", "year", "sex", "age", "variant"]
COLUMNS_INDICATORS = [
"central_death_rate",
"probability_of_death",
Expand All @@ -30,6 +25,8 @@
"life_expectancy",
"average_survival_length",
]
# Year threshold for projections
YEAR_PROJ_START = 2024


def run(dest_dir: str) -> None:
Expand All @@ -43,12 +40,16 @@ def run(dest_dir: str) -> None:
paths.log.info("load tables, concatenate.")
tb = pr.concat(
[
ds_meadow["un_wpp_lt_all"].reset_index(),
ds_meadow["un_wpp_lt_f"].reset_index(),
ds_meadow["un_wpp_lt_m"].reset_index(),
ds_meadow.read("un_wpp_lt_all"),
ds_meadow.read("un_wpp_lt_f"),
ds_meadow.read("un_wpp_lt_m"),
ds_meadow.read("un_wpp_lt_proj_all"),
ds_meadow.read("un_wpp_lt_proj_f"),
ds_meadow.read("un_wpp_lt_proj_m"),
],
short_name=paths.short_name,
).reset_index()
ignore_index=True,
)

#
# Process data.
Expand All @@ -64,7 +65,7 @@ def run(dest_dir: str) -> None:
# DTypes
tb = tb.astype(
{
"age": str,
"age": "string",
}
)

Expand All @@ -79,21 +80,35 @@ def run(dest_dir: str) -> None:

# Harmonize country names.
paths.log.info("harmonise country names.")
tb = geo.harmonize_countries(df=tb, countries_file=paths.country_mapping_path, country_col="location")
tb = geo.harmonize_countries(
df=tb,
countries_file=paths.country_mapping_path,
country_col="location",
)

# Harmonize sex sex
tb["sex"] = tb["sex"].map({"Total": "total", "Male": "male", "Female": "female"})
assert tb["sex"].notna().all(), "NaNs detected after mapping sex values!"

# Historical and Projection-only tables
tb_hist = tb.loc[tb["year"] < YEAR_PROJ_START]
tb_future = tb.loc[tb["year"] >= YEAR_PROJ_START]

# Set index
tb = tb.set_index(COLUMNS_INDEX, verify_integrity=True)[COLUMNS_INDICATORS]
tables = [
tb_hist.format(COLUMNS_INDEX, short_name="un_wpp_lt"),
tb_future.format(COLUMNS_INDEX, short_name="un_wpp_lt_proj"),
]

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb], check_variables_metadata=True, default_metadata=ds_meadow.metadata
dest_dir,
tables=tables,
check_variables_metadata=True,
default_metadata=ds_meadow.metadata,
)

# Save changes in the new garden dataset.
Expand Down
5 changes: 4 additions & 1 deletion etl/steps/data/meadow/un/2024-12-02/un_wpp_lt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ def run(dest_dir: str) -> None:
"un_wpp_lt_all", # ALL
"un_wpp_lt_f", # FEMALE
"un_wpp_lt_m", # MALE
"un_wpp_lt_proj_all", # PROJECTIONS, ALL
"un_wpp_lt_proj_f", # PROJECTIONS, FEMALE
"un_wpp_lt_proj_m", # PROJECTIONS, MALE
]

tables = []
Expand All @@ -53,7 +56,7 @@ def run(dest_dir: str) -> None:
tb["LocTypeName"].isin(["Geographic region", "Income group", "Country/Area", "World", "Development group"])
]
# Set index
tb = tb.format(["location", "time", "sex", "agegrp", "loctypename"])
tb = tb.format(["location", "time", "sex", "agegrp", "loctypename", "variant"])
# Add to tables list
tables.append(tb)

Expand Down

0 comments on commit 68b61fb

Please sign in to comment.