Skip to content

Commit

Permalink
Merge branch 'master' into wizard-anomalist
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasrodes committed Oct 16, 2024
2 parents 786f81a + 486bb78 commit aab0dc2
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 32 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ def run(dest_dir: str) -> None:

tb_dac1 = remove_jumps_in_the_data_and_unneeded_cols(tb=tb_dac1)

tb_dac1 = limit_grant_equivalents_from_2018_only(tb=tb_dac1)

tb = add_donor_data_from_recipient_dataset(tb_donor=tb_dac1, tb_recipient=tb_dac2a)

tb = add_recipient_dataset(tb=tb, tb_recipient=tb_dac2a)
Expand Down Expand Up @@ -525,7 +527,7 @@ def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
This is most likely because of aggregations of population and GNI not properly done by the source.
This is a temporary solution until the source fixes the data. It is already reported.
# Also, remove redundant columns.
Also, remove redundant columns.
"""

# For i_oda_net_disbursements_share_gni
Expand All @@ -545,3 +547,18 @@ def remove_jumps_in_the_data_and_unneeded_cols(tb: Table) -> Table:
)

return tb


def limit_grant_equivalents_from_2018_only(tb: Table) -> Table:
"""
Limit grant equivalent indicators from year 2018 onwards.
"""

tb = tb.copy()

# Define grant equivalent indicators by looking at all the columns containing the word "grant_equivalents"
grant_equivalent_indicators = [col for col in tb.columns if "grant_equivalents" in col]

tb.loc[tb["year"] < 2018, grant_equivalent_indicators] = None

return tb
47 changes: 41 additions & 6 deletions etl/steps/data/garden/wb/2024-10-07/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@

non_market_income_description = "Non-market sources of income, including food grown by subsistence farmers for their own consumption, are taken into account."

nowcast_regions_description = "Regional and global estimates are extrapolated up until the year of the data release using GDP growth estimates and forecasts. For more details about the methodology, please refer to the [World Bank PIP documentation](https://datanalytics.worldbank.org/PIP-Methodology/lineupestimates.html#nowcasts)."

relative_poverty_description = "This is a measure of _relative_ poverty – it captures the share of people whose income is low by the standards typical in their own country."

ppp_description = "The data is measured in international-$ at {ppp} prices – this adjusts for inflation and for differences in the cost of living between countries."

prosperity_gap_description = "The _prosperity gap_ indicator measures how much income would need to be multiplied to ensure everyone reaches a standard of prosperity, which is defined as $25 per person per day. This gives a sense of how far global incomes are from a basic prosperity standard."

processing_description_relative_poverty = "Measures of relative poverty are not directly available in the World Bank PIP data. To calculate this metric we take the median income or consumption for the country and year, calculate a relative poverty line – in this case {povline} of the median – and then run a specific query on the PIP API to return the share of population below that line."

processing_description_thr = "Income and consumption thresholds by decile are not directly available in the World Bank PIP API. We extract the metric primarily from [auxiliary percentiles data provided by the World Bank](https://datacatalog.worldbank.org/search/dataset/0063646). Missing country values and regional aggregations of the indicator are calculated by running multiple queries on the API to obtain the closest poverty line to each threshold."
Expand All @@ -19,10 +27,6 @@

processing_description_avg = "Income and consumption averages by decile are not directly available in the World Bank PIP API. We calculate the metric by multiplying the share of each decile by the mean income or consumption of the distribution and dividing by the population share of the decile (10%)."

relative_poverty_description = "This is a measure of _relative_ poverty – it captures the share of people whose income is low by the standards typical in their own country."

ppp_description = "The data is measured in international-$ at {ppp} prices – this adjusts for inflation and for differences in the cost of living between countries."


# Define default tolerance for each variable
TOLERANCE = 5
Expand Down Expand Up @@ -102,7 +106,7 @@
},
"pg": {
"title": "Prosperity gap",
"description": "Average factor by which incomes need to be multiplied to bring everyone in the world to the prosperity standard of $25 per person per day.",
"description": "Average factor by which incomes need to be multiplied to bring everyone to the prosperity standard of $25 per person per day.",
"unit": "",
"short_unit": "",
"numDecimalPlaces": 1,
Expand Down Expand Up @@ -581,12 +585,37 @@ def var_metadata_inequality_mean_median(var, origins, welfare_type) -> VariableM
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
unit=var_dict[var]["unit"],
short_unit=var_dict[var]["short_unit"],
origins=origins,
)
meta.display = {
"name": meta.title,
"numDecimalPlaces": var_dict[var]["numDecimalPlaces"],
"tolerance": TOLERANCE,
}

meta.presentation = VariablePresentationMeta(title_public=meta.title)

elif var in ["top1_thr", "top1_avg", "spl", "spr"]:
meta = VariableMeta(
title=f"{var_dict[var]['title']}",
description_short=var_dict[var]["description"],
description_key=[
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
unit=var_dict[var]["unit"],
short_unit=var_dict[var]["short_unit"],
origins=origins,
)

meta.display = {
"name": meta.title,
"numDecimalPlaces": var_dict[var]["numDecimalPlaces"],
Expand All @@ -595,14 +624,16 @@ def var_metadata_inequality_mean_median(var, origins, welfare_type) -> VariableM

meta.presentation = VariablePresentationMeta(title_public=meta.title)

elif var in ["top1_thr", "top1_avg", "spl", "spr", "pg"]:
elif var in ["pg"]:
meta = VariableMeta(
title=f"{var_dict[var]['title']}",
description_short=var_dict[var]["description"],
description_key=[
prosperity_gap_description,
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
unit=var_dict[var]["unit"],
Expand Down Expand Up @@ -676,6 +707,7 @@ def var_metadata_absolute_povlines(var, povline, origins, ppp_version, welfare_t
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
]

# Remove empty strings from the list
Expand Down Expand Up @@ -713,6 +745,7 @@ def var_metadata_between_absolute_povlines(var, povline1, povline2, origins, ppp
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{inc_cons_dict[welfare_type]['processing_description']}""",
unit=var_dict[var]["unit"].replace("{ppp}", str(ppp_version)),
Expand Down Expand Up @@ -751,6 +784,7 @@ def var_metadata_relative_povlines(var, rel, origins, welfare_type) -> VariableM
relative_poverty_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{processing_description_relative_poverty}
Expand Down Expand Up @@ -783,6 +817,7 @@ def var_metadata_percentiles(var, pct, origins, ppp_version, welfare_type) -> Va
ppp_description,
inc_cons_dict[welfare_type]["description"],
non_market_income_description,
nowcast_regions_description,
],
description_processing=f"""{processing_description_thr}
Expand Down
2 changes: 0 additions & 2 deletions etl/steps/data/garden/wb/2024-10-07/world_bank_pip.meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,5 @@ tables:
description_key:
- The regions employed by the World Bank PIP differ from the regional groupings used by the World Bank in other contexts.
- Some economies, mostly high-income economies, are excluded from the geographical regions and are included as a separate group referred to as “other high income” (or “industrialized economies” or “rest of the world” in earlier publications).
display:
tolerance: 40
presentation:
title_public: World regions according to World Bank Poverty and Inequality Platform
4 changes: 2 additions & 2 deletions snapshots/climate/latest/weekly_wildfires.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ meta:
citation_full: Global Wildfire Information System
attribution_short: GWIS
url_main: https://gwis.jrc.ec.europa.eu/apps/gwis.statistics/seasonaltrend
date_accessed: 2024-10-15
date_published: 2024-10-15
date_accessed: 2024-10-16
date_published: 2024-10-16
license:
name: CC BY 4.0
url: https://gwis.jrc.ec.europa.eu/about-gwis/data-license
Expand Down
2 changes: 1 addition & 1 deletion snapshots/covid/latest/cases_deaths.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ meta:
version_producer: WHO COVID-19 Dashboard - Daily cases and deaths
url_main: https://covid19.who.int/
url_download: https://covid19.who.int/WHO-COVID-19-global-data.csv
date_accessed: 2024-10-15
date_accessed: 2024-10-16
date_published: '2024-07-07'
license:
name: CC BY 4.0
Expand Down
2 changes: 1 addition & 1 deletion snapshots/excess_mortality/latest/hmd_stmf.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ meta:
HMD provides an online STMF visualization toolkit (https://mpidr.shinyapps.io/stmortality).
url: https://www.mortality.org/Data/STMF
source_data_url: https://www.mortality.org/File/GetDocument/Public/STMF/Outputs/stmf.csv
date_accessed: 2024-10-15
date_accessed: 2024-10-16
publication_date: 2024-09-30
publication_year: 2024
published_by: |-
Expand Down
2 changes: 1 addition & 1 deletion snapshots/excess_mortality/latest/wmd.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ meta:
Published paper available at https://elifesciences.org/articles/69336.
url: https://github.com/akarlinsky/world_mortality/
source_data_url: https://raw.githubusercontent.com/akarlinsky/world_mortality/main/world_mortality.csv
date_accessed: 2024-10-15
date_accessed: 2024-10-16
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ meta:
For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
url: https://github.com/dkobak/excess-mortality
source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-per-year.csv
date_accessed: 2024-10-15
date_accessed: 2024-10-16
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ meta:
For more details, refer to https://github.com/dkobak/excess-mortality#excess-mortality-during-the-covid-19-pandemic.
url: https://github.com/dkobak/excess-mortality
source_data_url: https://raw.githubusercontent.com/dkobak/excess-mortality/main/baselines-stmf.csv
date_accessed: 2024-10-15
date_accessed: 2024-10-16
publication_date: '2021-06-30'
publication_year: 2021
published_by: |-
Expand Down
4 changes: 2 additions & 2 deletions snapshots/who/latest/fluid.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ meta:
The platform accommodates both qualitative and quantitative data which facilitates the tracking of global trends, spread, intensity, and impact of influenza. These data are made freely available to health policy makers in order to assist them in making informed decisions regarding the management of influenza.
wdir: ../../../data/snapshots/who/latest
outs:
- md5: abc6873780d866491328545f6dd3b4e7
size: 165789281
- md5: aa91ea6a8ed33a292919a124f46cfd67
size: 165814784
path: fluid.csv
4 changes: 2 additions & 2 deletions snapshots/who/latest/flunet.csv.dvc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ meta:
The data are provided remotely by National Influenza Centres (NICs) of the Global Influenza Surveillance and Response System (GISRS) and other national influenza reference laboratories collaborating actively with GISRS, or are uploaded from WHO regional databases.
wdir: ../../../data/snapshots/who/latest
outs:
- md5: 11feb754fc65574996285d4481a0e7e5
size: 26890129
- md5: 5f93578131a7ac494148a52f4fedfe4f
size: 26895005
path: flunet.csv

0 comments on commit aab0dc2

Please sign in to comment.